{-|
Module      : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

{-# LANGUAGE ViewPatterns      #-}

module Gargantext.Core.Viz.Phylo.Aggregates
  where

import Control.Parallel.Strategies

import Gargantext.Prelude hiding  (elem)
import Gargantext.Core.Text.Context    (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools

import Debug.Trace (trace)

import Data.List    (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map     (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set     (size)
import Data.Text    (Text, unwords)
import Data.Vector  (Vector)

import qualified Data.Map as Map
import qualified Data.Set as Set

import qualified Data.Vector as Vector



---------------------
-- | Foundations | --
---------------------

-- | Extract all the labels of a termList
termListToNgrams :: TermList -> [Ngrams]
termListToNgrams = map (\(lbl,_) -> unwords lbl)


-------------------
-- | Documents | --
-------------------

-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _   [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = 
  let periods  = map (inPeriode f es) pds
      periods' = periods `using` parList rdeepseq

  in  trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
  where
    --------------------------------------
    inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
    inPeriode f' h (start,end) =
      fst $ partition (\d -> f' d >= start && f' d <= end) h
    --------------------------------------    


-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
parseDocs roots c = map (\(d,t)
                -> Document d ( filter (\x -> Vector.elem x roots)
                              $ monoTexts t)) c

-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus


-----------------
-- | Periods | --
-----------------


-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "initPeriods" l, last' "initPeriods" l))
                            $ chunkAlong g s [start .. end]


--------------
-- | Cooc | --
--------------


-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
         $ foldl (\mem x -> adjust (+1) x mem) cooc
         $ concat
         $ map (\x -> listToFullCombi $ fst x) l
  where
    --------------------------------------
    idx :: [Int]
    idx = nub $ concat $ map fst l
    --------------------------------------
    docs :: Double
    docs = sum $ map snd l
    --------------------------------------
    cooc :: Map (Int, Int) (Double)
    cooc = fromList $ map (\x -> (x,0)) $ listToFullCombi idx
    --------------------------------------    


-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
                                          && (elem (snd k) idx)) cooc


-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
  where
    --------------------------------------
    -- | Here we need to go back to the level 1 (aka : the Fis level)
    gs :: [PhyloGroup]
    gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
    -------------------------------------- 


-- | To transform a list of index into a cooc matrix 
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst


-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc 
                    $ map (\(d,l) -> (d, listToCooc l))
                    $ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs


-------------
-- | Fis | --
-------------


-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of
  False -> map (\l -> f thr l) m
  True  -> map (\l -> keepFilled (f) thr l) m


-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l


-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l


-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
  | null l'               = True
  | length l' > length l  = False
  | (union  l l') == l    = True
  | otherwise             = False 


-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested m = 
  let fis  = map (\l -> 
                foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
                                 then mem
                                 else 
                                    let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
                                    in  fMax ++ [f] ) [] l)
           $ elems m 
      fis' = fis `using` parList rdeepseq
  in  fromList $ zip (keys m) fis' 


-- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p)
                 then trace("----\nRebuild the Fis from scratch\n") 
                    $ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
                                                              in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
                 else trace("----\nUse Fis from an existing file\n") 
                    $ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)


-- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
                      $ filterFisByNested 
                      $ traceFis "----\nFiltered Fis by clique size :\n"                      
                      $ filterFis k t (filterFisByClique)
                      $ traceFis "----\nFiltered Fis by support :\n"
                      $ filterFis k s (filterFisBySupport)
                      $ traceFis "----\nUnfiltered Fis :\n" fis


-----------------
-- | Tracers | --
-----------------


traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] 
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
                            <> "support : " <> show (countSup 1 supps) <> " (>1) "
                                            <> show (countSup 2 supps) <> " (>2) "
                                            <> show (countSup 3 supps) <> " (>3) "
                                            <> show (countSup 4 supps) <> " (>4) "
                                            <> show (countSup 5 supps) <> " (>5) "
                                            <> show (countSup 6 supps) <> " (>6)\n"                                                                                                                                          
                            <> "clique size : " <> show (countSup 1 ngrms) <> " (>1) "
                                                <> show (countSup 2 ngrms) <> " (>2) "
                                                <> show (countSup 3 ngrms) <> " (>3) "
                                                <> show (countSup 4 ngrms) <> " (>4) "
                                                <> show (countSup 5 ngrms) <> " (>5) "
                                                <> show (countSup 6 ngrms) <> " (>6)\n"                                                                                             
                            ) m
  where
    --------------------------------------
    countSup :: Double -> [Double] -> Int
    countSup s l = length $ filter (>s) l 
    --------------------------------------
    supps :: [Double]
    supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
    --------------------------------------
    ngrms :: [Double]
    ngrms = sort $ map (\f -> fromIntegral $ size $ _phyloFis_clique f) $ concat $ elems m
    --------------------------------------