{-|
Module      : Gargantext.TextFlow
Description : Server API
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

From text to viz, all the flow of texts in Gargantext.

-}

{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude           #-}

module Gargantext.TextFlow
  where

import qualified Data.Text as T
import Data.Text.IO (readFile)

import Control.Arrow ((***))
import Data.Map.Strict (Map)
import qualified Data.Array.Accelerate as A
import qualified Data.Map.Strict as M
import qualified Data.List       as L
import Data.Tuple.Extra (both)
----------------------------------------------
import Gargantext.Core (Lang(FR))
import Gargantext.Core.Types (Label)
import Gargantext.Prelude
import Prelude (print, seq)

import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, map2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional, distributional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Viz.Graph (Graph(..), Node(..), Edge(..), Attributes(..), TypeNode(..))
import Gargantext.Text.Metrics.Count (cooc)
import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))

import Gargantext.Text.Parsers.CSV

import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode(..))


{-
  ____                             _            _
 / ___| __ _ _ __ __ _  __ _ _ __ | |_ _____  _| |_
| |  _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | ||  __/>  <| |_
 \____|\__,_|_|  \__, |\__,_|_| |_|\__\___/_/\_\\__|
                 |___/

-}

printDebug msg x = putStrLn $ msg <> " " <> show x
--printDebug _ _ = pure ()

data TextFlow = CSV | FullText

-- workflow :: Lang (EN|FR) -> FilePath -> Graph
textflow termsLang workType path = do
  -- Text  <- IO Text <- FilePath
  contexts <- case workType of
                FullText -> splitBy (Sentences 5) <$> readFile path
                CSV      -> readCsvOn [csv_title, csv_abstract] path

  -- Context :: Text -> [Text]
  -- Contexts = Paragraphs n | Sentences n | Chars n

  myterms <- extractTerms (Mono FR) contexts
  -- TermsType = Mono | Multi | MonoMulti
  -- myterms # filter (\t -> not . elem t stopList)
  --         # groupBy (Stem|GroupList|Ontology)
  printDebug "myterms" (sum $ map length myterms)

  -- Bulding the map list
  -- compute copresences of terms
  -- Cooc = Map (Term, Term) Int
  let myCooc1 = cooc myterms
  printDebug "myCooc1" (M.size myCooc1)

  -- Remove Apax: appears one time only => lighting the matrix
  let myCooc2 = M.filter (>1) myCooc1
  printDebug "myCooc2" (M.size myCooc2)

  -- Filtering terms with inclusion/Exclusion and Specifity/Genericity scores
  let myCooc3 = filterCooc ( FilterConfig (MapListSize   1000 )
                                          (InclusionSize 4000 )
                                          (SampleBins      10 )
                                          (Clusters         3 )
                                          (DefaultValue     0 )
                           ) myCooc2
  printDebug "myCooc3" $ M.size myCooc3

  -- Cooc -> Matrix
  let (ti, fi) = createIndices myCooc3
  printDebug "ti" $ M.size ti

  let myCooc4 = toIndex ti myCooc3
  printDebug "myCooc4" $ M.size myCooc4

  let matCooc = map2mat (0) (M.size ti) myCooc4
  --printDebug "matCooc" matCooc
  -- Matrix -> Clustering
  let distanceMat = conditional matCooc
--  let distanceMat = distributional matCooc
  printDebug "distanceMat" $ A.arrayShape distanceMat
  --printDebug "distanceMat" distanceMat
-- 
  let distanceMap = mat2map distanceMat
  printDebug "distanceMap" $ M.size distanceMap
--{-
--  let distance = fromIndex fi distanceMap
--  printDebug "distance" $ M.size distance
---}
  partitions <- cLouvain distanceMap
------ | Building : -> Graph -> JSON
  printDebug "partitions" $ length partitions
  --printDebug "partitions" partitions
  pure $ data2graph (M.toList ti) myCooc4 distanceMap partitions


-----------------------------------------------------------
-- distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Label, Int)] -> Map (Int, Int) Int
                             -> Map (Int, Int) Double
                             -> [LouvainNode]
              -> Graph
data2graph labels coocs distance partitions = Graph nodes edges
  where
    community_id_by_node_id = M.fromList [ (n, c) | LouvainNode n c <- partitions ]
    nodes = [ Node { node_size = maybe 0 identity (M.lookup (n,n) coocs)
                   , node_type = Terms -- or Unknown
                   , node_id = cs (show n)
                   , node_label = T.unwords l
                   , node_attributes = 
                     Attributes { clust_default = maybe 0 identity 
                                (M.lookup n community_id_by_node_id) } }
            | (l, n) <- labels ]
    edges = [ Edge { edge_source = cs (show s)
                   , edge_target = cs (show t)
                   , edge_weight = w
                   , edge_id     = cs (show i) }
            | (i, ((s,t), w)) <- zip [0..] (M.toList distance) ]
-----------------------------------------------------------