From 4a4ade7d23921dd160aee499500d01e5371493e1 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Tue, 29 May 2018 17:32:34 +0200
Subject: [PATCH 01/16] [FEAT] Cooc -> Matrix conversions tools.

---
 package.yaml                                  |   1 +
 src/Gargantext/Pipeline.hs                    |  25 +++-
 src/Gargantext/Prelude.hs                     |  12 ++
 src/Gargantext/Text/Metrics/Occurrences.hs    |  59 ++++++---
 src/Gargantext/Text/Terms.hs                  |   2 +
 src/Gargantext/Text/Terms/Lists.hs            |   1 -
 src/Gargantext/Viz/Graph/Distances.hs         |  25 ++++
 src/Gargantext/Viz/Graph/Distances/Matrice.hs |  21 +++-
 src/Gargantext/Viz/Graph/Index.hs             | 114 ++++++++++++++++++
 src/Gargantext/Viz/Graph/Utils.hs             |  95 ++-------------
 stack.yaml                                    |   3 +-
 11 files changed, 241 insertions(+), 117 deletions(-)
 create mode 100644 src/Gargantext/Viz/Graph/Distances.hs
 create mode 100644 src/Gargantext/Viz/Graph/Index.hs

diff --git a/package.yaml b/package.yaml
index e7884f03..2de9e6a0 100644
--- a/package.yaml
+++ b/package.yaml
@@ -35,6 +35,7 @@ library:
   dependencies:
   - QuickCheck
   - accelerate
+  - accelerate-io
   - aeson
   - aeson-lens
   - aeson-pretty
diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index f5b52066..ba74e8ee 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -10,29 +10,44 @@ Portability : POSIX
 -}
 
 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-
-{-# LANGUAGE NoImplicitPrelude    #-}
+{-# LANGUAGE NoImplicitPrelude           #-}
 
 module Gargantext.Pipeline
   where
 
+import Data.Text (unpack)
+import qualified Data.Text as DT
 
 import Data.Text.IO (readFile)
 
+----------------------------------------------
+----------------------------------------------
+
 import Gargantext.Core
+import Gargantext.Core.Types
 import Gargantext.Prelude
 
+import Gargantext.Viz.Graph.Index (map', createIndexes)
+import Gargantext.Viz.Graph.Distances.Matrice (distributional, int2double)
 import Gargantext.Text.Metrics.Occurrences
 import Gargantext.Text.Terms
 import Gargantext.Text.Context
 
+import Data.Array.Accelerate as A
 
 pipeline pth = do
   text     <- readFile pth
   let contexts = splitBy Sentences 4 text
-  cooc <$> map occurrences <$> mapM (terms Mono FR) contexts
-  -- todo
+  myterms <- mapM (terms Multi FR) contexts
+  -- todo filter stop words
+  let myCooc = removeApax $ cooc myterms
+  --pure myCooc
   -- Cooc map -> Matrix
-  -- distributional or conditional
+  --pure $ createIndexes myCooc
+  pure $ map' int2double myCooc
   -- Matrix -> Graph
 
+
+
+
+
diff --git a/src/Gargantext/Prelude.hs b/src/Gargantext/Prelude.hs
index 29083ec9..4cad2f18 100644
--- a/src/Gargantext/Prelude.hs
+++ b/src/Gargantext/Prelude.hs
@@ -49,6 +49,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer
                  , otherwise, when
                  , undefined
                  , IO()
+                 , compare
                  )
 
 -- TODO import functions optimized in Utils.Count
@@ -106,6 +107,12 @@ movingAverage steps xs = map mean $ chunkAlong steps 1 xs
 ma :: [Double] -> [Double]
 ma = movingAverage 3
 
+-- | splitEvery n == chunkAlong n n
+splitEvery :: Int -> [a] -> [[a]]
+splitEvery _ [] = L.cycle [[]]
+splitEvery n xs =
+  let (h,t) = L.splitAt n xs
+  in  h : splitEvery n t
 
 -- | Function to split a range into chunks
 chunkAlong :: Int -> Int -> [a] -> [[a]]
@@ -227,3 +234,8 @@ zipSnd f xs = zip xs (f xs)
 unMaybe :: [Maybe a] -> [a]
 unMaybe = map fromJust . L.filter isJust
 
+-- maximumWith
+maximumWith f = L.maximumBy (\x y -> compare (f x) (f y))
+
+
+
diff --git a/src/Gargantext/Text/Metrics/Occurrences.hs b/src/Gargantext/Text/Metrics/Occurrences.hs
index d4e24b33..00a0f429 100644
--- a/src/Gargantext/Text/Metrics/Occurrences.hs
+++ b/src/Gargantext/Text/Metrics/Occurrences.hs
@@ -29,12 +29,17 @@ module Gargantext.Text.Metrics.Occurrences
   where
 
 
+import Control.Arrow ((***))
+import qualified Data.List as List
 import Data.Map.Strict  (Map
-                        , empty
+                        , empty, singleton
                         , insertWith, insertWithKey, unionWith
-                        , toList
+                        , toList, lookup, mapKeys
                         )
 import Data.Set (Set)
+import qualified Data.Set as Set
+import Data.Text (pack)
+
 import qualified Data.Map.Strict as DMS
 import Control.Monad ((>>),(>>=))
 import Data.String (String())
@@ -53,6 +58,7 @@ data Group = ByStem | ByOntology
 type Grouped = Stems
 
 
+{-
 -- >> let testData = ["blue lagoon", "blues lagoon", "red lagoon"]
 -- >> map occurrences <$> Prelude.mapM (terms Mono EN) 
 -- [fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["lagoon"],1),(fromList ["red"],1)]]
@@ -66,32 +72,45 @@ type Grouped = Stems
 --fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
 ---- 
 
-cooc :: (Ord b, Num a) => [Map b a] -> Map (b, b) a
-cooc ts = cooc' $ map cooc'' ts
+           -}
+
+type Occs = Int
+type Coocs = Int
 
-cooc' :: (Ord b, Num a) => [Map (b, b) a] -> Map (b,b) a
-cooc' = foldl' (\x y -> unionWith (+) x y) empty
+removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int
+removeApax = DMS.filter (> 1)
 
-cooc'' :: (Ord b, Num a) => Map b a -> Map (b, b) a
-cooc'' m = foldl' (\x (y,c) -> insertWith (+) y c x) empty xs
+cooc :: [[Terms]] -> Map (Label, Label) Int
+cooc tss =
+  mapKeys (delta $ labelPolicy terms_occs) $ cooc' (map (Set.fromList . map _terms_stem) tss)
   where
-      xs =[ ((x'',y''), c') | x' <- toList m
-                            , y' <- toList m
-                            , let x'' = fst x'
-                            , let y'' = fst y'
-                            , x'' < y''
-                            , let c' = 1
-                            --, let c' = snd x' + snd y'
-                            ]
+    terms_occs = occurrences (List.concat tss)
+    delta f = f *** f
+
+
+labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
+labelPolicy m g =  case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
+                     Just label -> label
+                     Nothing    -> panic $ "Label of Grouped not found: " <> (pack $ show g)
+
+cooc' :: Ord b => [Set b] -> Map (b, b) Coocs
+cooc' tss = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
+  where
+      xs = [ ((x, y), 1)
+           | xs <- tss
+           , ys <- tss
+           , x <- Set.toList xs
+           , y <- Set.toList ys
+           , x < y
+           ]
 
 
 -- | Compute the grouped occurrences (occ)
-occurrences :: [Terms] -> Map Grouped Int
+occurrences :: [Terms] -> Map Grouped (Map Terms Int)
 occurrences = occurrences' _terms_stem
 
-occurrences' :: Ord b => (a -> b) -> [a] -> Occ b
-occurrences' f xs = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-
+occurrences' :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
+occurrences' f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
 
 -- TODO add groups and filter stops
 sumOcc :: Ord a => [Occ a] -> Occ a
diff --git a/src/Gargantext/Text/Terms.hs b/src/Gargantext/Text/Terms.hs
index 68dbb067..f19496f2 100644
--- a/src/Gargantext/Text/Terms.hs
+++ b/src/Gargantext/Text/Terms.hs
@@ -43,6 +43,8 @@ import Gargantext.Text.Terms.Mono  (monoterms')
 
 data TermType = Mono | Multi
 
+-- remove Stop Words
+-- map (filter (\t -> not . elem t)) $ 
 ------------------------------------------------------------------------
 terms :: TermType -> Lang -> Text -> IO [Terms]
 terms Mono  lang txt = pure $ monoterms' lang txt
diff --git a/src/Gargantext/Text/Terms/Lists.hs b/src/Gargantext/Text/Terms/Lists.hs
index bc7c1e4d..9b4f10ce 100644
--- a/src/Gargantext/Text/Terms/Lists.hs
+++ b/src/Gargantext/Text/Terms/Lists.hs
@@ -35,4 +35,3 @@ data ListName = Stop | Candidate | Graph
 --stop :: [Ngrams] -> [Ngrams]
 --stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs
 
-
diff --git a/src/Gargantext/Viz/Graph/Distances.hs b/src/Gargantext/Viz/Graph/Distances.hs
new file mode 100644
index 00000000..b10d356e
--- /dev/null
+++ b/src/Gargantext/Viz/Graph/Distances.hs
@@ -0,0 +1,25 @@
+{-|
+Module      : Gargantext.Graph.Distances
+Description : Distance management tools
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE Strict            #-}
+
+module Gargantext.Viz.Graph.Distances
+  where
+
+
+
+
+
+
+
+
diff --git a/src/Gargantext/Viz/Graph/Distances/Matrice.hs b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
index f0d03eeb..546dedc8 100644
--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -48,6 +48,13 @@ import Data.Maybe (Maybe(Just))
 import qualified Gargantext.Prelude as P
 import qualified Data.Array.Accelerate.Array.Representation     as Repr
 
+import Gargantext.Text.Metrics.Occurrences
+
+
+-----------------------------------------------------------------------
+-- Test perf.
+distriTest = distributional $ myMat 100
+-----------------------------------------------------------------------
 
 vector :: Int -> (Array (Z :. Int) Int)
 vector n = fromList (Z :. n) [0..n]
@@ -55,14 +62,14 @@ vector n = fromList (Z :. n) [0..n]
 matrix :: Elt c => Int -> [c] -> Matrix c
 matrix n l = fromList (Z :. n :. n) l
 
-myMat :: Int -> Matrix Double
+myMat :: Int -> Matrix Int
 myMat n = matrix n [1..]
 
 -- | Two ways to get the rank (as documentation)
-rank :: (Matrix Double) -> Int
+rank :: (Matrix a) -> Int
 rank m = arrayRank $ arrayShape m
 
-rank' :: (Matrix Double) -> Int
+rank' :: (Matrix a) -> Int
 rank' m = n
   where
     Z :. _ :. n = arrayShape m
@@ -109,8 +116,8 @@ conditional m = (run $ ie (use m), run $ sg (use m))
 
 -- | Distributional Distance
 
-distributional :: Matrix Double -> Matrix Double
-distributional m = run $ filter $ ri (use m)
+distributional :: Matrix Int -> Matrix Double
+distributional m = run $ filter $ ri (map fromIntegral $ use m)
   where
     n    = rank' m
     
@@ -131,3 +138,7 @@ distributional m = run $ filter $ ri (use m)
     crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
     cross mat      = zipWith (-) (mkSum n mat) (mat)
 
+
+int2double :: Matrix Int -> Matrix Double
+int2double m = run (map fromIntegral $ use m)
+
diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
new file mode 100644
index 00000000..5fe0d3c7
--- /dev/null
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -0,0 +1,114 @@
+{-|
+Module      : Gargantext.Graph.Distances.Utils
+Description : Tools to compute distances from Cooccurrences
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+Basically @compute@ takes an accelerate function as first input, a Map
+of coccurrences as second input and outputs a Map automatically using
+indexes.
+
+TODO:
+--cooc2fgl :: Ord t, Integral n => Map (t, t) n -> Graph
+--fgl2json
+
+-}
+
+{-# LANGUAGE BangPatterns      #-}
+{-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE TypeOperators     #-}
+
+
+module Gargantext.Viz.Graph.Index
+  where
+
+import qualified Data.Array.Accelerate as A
+import qualified Data.Array.Accelerate.IO.Data.Vector.Unboxed as AU
+
+import qualified Data.Vector.Unboxed as DVU
+import Data.List (concat)
+
+import Data.Set (Set)
+import qualified Data.Set as S
+
+import Data.Map (Map)
+import qualified Data.Map.Strict    as M
+
+import Gargantext.Prelude
+
+type Index    = Int
+
+
+-------------------------------------------------------------------------------
+{-
+map'' :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
+                -> Map (t, t) Int
+                -> Map (t, t) Double
+map'' f m = back . f' . from m
+  where
+    from (fs, m') = unzip $ M.toAscList m
+    f'  = f $ A.fromList shape m'
+    shape = (A.Z A.:. n A.:. n)
+    back = M.fromAscList . zip fs . A.toList
+-}
+-------------------------------------------------------------------------------
+map' :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
+                -> Map (t, t) Int
+                -> Map (t, t) Double
+map' f m = fromIndex fromI . mat2cooc . f $ cooc2mat toI m
+  where
+    (toI, fromI) = createIndexes m
+
+map'' m = cooc2mat toI m
+  where
+    (toI, fromI) = createIndexes m
+
+-------------------------------------------------------------------------------
+-------------------------------------------------------------------------------
+cooc2mat :: Ord t => Map t Index -> Map (t, t) Int -> A.Matrix Int
+cooc2mat ti m = A.fromFunction shape (\(A.Z A.:. x A.:. y) -> lookup' x y)
+  where
+    shape = (A.Z A.:. n A.:. n)
+    n = M.size ti
+    lookup' x y = maybe 0 identity (M.lookup (x,y) (toIndex ti m))
+
+mat2cooc :: A.Matrix Double -> Map (Index, Index) Double
+mat2cooc m = M.fromList $ concat                                          --  [((Int,Int), Double)]
+                        $ map (\(x,xs) -> map (\(y,ys) -> ((x,y),ys)) xs) -- [[((Int,Int), Double)]]
+                        $ zip ([1..] :: [Int])                            -- [(Int, [(Int, Double)]]
+                        $ map (zip ([1..] :: [Int]))                      -- [[(Int, Double)]]
+                        $ splitEvery n (A.toList m)                       -- [[Double]]
+  where
+    A.Z A.:. _ A.:. n = A.arrayShape m
+
+-------------------------------------------------------------------------------
+-------------------------------------------------------------------------------
+toIndex :: Ord t => Map t Index -> Map (t,t) a -> Map (Index,Index) a
+toIndex ni ns = indexConversion ni ns
+
+fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
+fromIndex ni ns = indexConversion ni ns
+---------------------------------------------------------------------------------
+indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
+indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
+-------------------------------------------------------------------------------
+-------------------------------------------------------------------------------
+createIndexes :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
+createIndexes = set2indexes . cooc2set
+  where
+    cooc2set :: Ord t => Map (t, t) a -> Set t
+    cooc2set cs' = foldl' (\s ((t1,t2),_) -> insert [t1,t2] s ) S.empty (M.toList cs')
+      where
+        insert as s = foldl' (\s' t -> S.insert t s') s as
+
+    set2indexes :: Ord t => Set t -> (Map t Index, Map Index t)
+    set2indexes s = (M.fromList toIndex', M.fromList fromIndex')
+      where
+        fromIndex' = zip [1..] (S.toList s)
+        toIndex'   = zip (S.toList s)  [1..]
+
+
diff --git a/src/Gargantext/Viz/Graph/Utils.hs b/src/Gargantext/Viz/Graph/Utils.hs
index 04af0f5c..f91a4b62 100644
--- a/src/Gargantext/Viz/Graph/Utils.hs
+++ b/src/Gargantext/Viz/Graph/Utils.hs
@@ -7,6 +7,8 @@ Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX
 
+These functions are used for Vector.Matrix only.
+
 -}
 
 {-# LANGUAGE BangPatterns      #-}
@@ -34,65 +36,16 @@ import Gargantext.Prelude
 ------------------------------------------------------------------------
 -- | Some utils to build the matrix from cooccurrence results
 
-type Distance = Double
-type Cooc     = Int
-type NgramId  = Int
-type Index    = Int
-
--- Type Families
---type Matrix' Index a
---type Matrix' NgramId a
-
-data Matrice a = Matrice { matrice_fromIndex :: !(Map Index NgramId)
-                         , matrice_toIndex   :: !(Map NgramId Index)
-                         , matrice           :: !(Matrix a)
-                         } deriving (Show)
-
---fromMatrice :: Matrice Double -> [(NgramId, NgramId, Double)]
---fromMatrice m = undefined
-
-
-toMatrice :: [(NgramId, NgramId, Int)] -> Matrice Double
-toMatrice ns = Matrice fromIndx toIndx m
-  where
-    s   = cooc2set ns
-    (fromIndx, toIndx) = set2indexes s
-    n   = (length (S.toList s))
-    idx = toIndex toIndx ns
-    m   = matrix n n (\x -> maybe 0 identity (fromIntegral <$> M.lookup x idx))
-
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-toIndex :: Map NgramId Index -> [(NgramId, NgramId, a)] -> Map (Index,Index) a
-toIndex ni ns = to ni ns
-
-fromIndex :: Map Index NgramId -> [(Index, Index, a)] -> Map (NgramId,NgramId) a
-fromIndex ni ns = to ni ns
--------------------------------------------------------------------------------
-to :: (Ord b, Ord k) => Map k b -> [(k, k, a)] -> Map (b, b) a
-to index ns = M.fromList $ map (\(a1,a2,c) -> ( ( (M.!) index a1
-                                                , (M.!) index a2
-                                                )
-                                              , c 
-                                              )
-                                 ) ns
-
--------------------------------------------------------------------------------
-cooc2set :: [(NgramId, NgramId, a)] -> Set NgramId
-cooc2set cs' = foldl' (\s (a1,a2,_) -> insert [a1,a2] s ) S.empty cs'
-  where
-    insert as s = foldl' (\s' a -> S.insert a s') s as
-
-
-set2indexes :: Set NgramId -> (Map Index NgramId, Map NgramId Index)
-set2indexes s = (M.fromList fromIndex', M.fromList toIndex')
-  where
-    s' = S.toList s
-    fromIndex' = zip [1..] s'
-    toIndex'   = zip  s'  [1..]
-
-
+-- | For tests only, to be removed
+-- m1 :: Matrix Double
+-- m1 = fromList 300 300 [1..]
+------------------------------------------------------------------------
 ------------------------------------------------------------------------
+data Axis = Col | Row
+------------------------------------------------------------------------
+-- | Matrix functions
+type AxisId = Int
+
 -- Data.Vector.Additions
 dropAt :: Int -> Vector a -> Vector a
 dropAt n v = debut <> (V.tail fin)
@@ -100,28 +53,6 @@ dropAt n v = debut <> (V.tail fin)
     debut = V.take n v
     fin   = V.drop n v
 
-------------------------------------------------------------------------
-data Axis = Col | Row
----- | Matrix Algebra
---data Algebra a = Point a | Vector a | Matrix a
---
---multiply :: Algebra a -> Matrix a -> Matrix a
---multiply (Point  a) = undefined
---multiply (Vector a) = undefined
---multiply (Matrix a) = undefined
---
---div :: Fractional a => Matrix a -> Matrix a
---div m = foldl' (\m c -> divCol c m) m [1.. (ncols m)]
---  where
---    divCol c m = mapCol (\_ x -> 1/x) c m
---
---divide :: Fractional a => Matrix a -> Matrix a -> Matrix a
---divide a b = a `multStd` (div b)
-
-------------------------------------------------------------------------
--- | Matrix functions
-type AxisId = Int
-
 total :: Num a => Matrix a -> a
 total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
 
@@ -141,7 +72,3 @@ toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
     concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs
 
 
--- | For tests only, to be removed
-m1 :: Matrix Double
-m1 = fromList 300 300 [1..]
-
diff --git a/stack.yaml b/stack.yaml
index bc83197c..23363269 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -13,9 +13,8 @@ extra-deps:
   commit: 6f0595d2421005837d59151a8b26eee83ebb67b5
 - git: https://github.com/delanoe/servant-static-th.git
   commit: fff77e79fe94d563ab5cae2609b78c17b5c1f434
-  #- git: https://github.com/delanoe/accelerate.git
-  #commit: 007fd483a4410441fb5dd1b689a5f7dab66d27ad
 - accelerate-1.2.0.0
+- accelerate-io-1.2.0.0
 - aeson-1.2.4.0
 - aeson-lens-0.5.0.0
 - duckling-0.1.3.0
-- 
2.47.0


From 8d237e3cc3b4fa9718af65f45cbb063aa1301bdf Mon Sep 17 00:00:00 2001
From: Nicolas Pouillard <nicolas.pouillard@gmail.com>
Date: Wed, 30 May 2018 12:17:47 +0200
Subject: [PATCH 02/16] Fix mat2cooc

---
 src/Gargantext/Viz/Graph/Index.hs | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
index 5fe0d3c7..9a378a20 100644
--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -27,7 +27,8 @@ module Gargantext.Viz.Graph.Index
   where
 
 import qualified Data.Array.Accelerate as A
-import qualified Data.Array.Accelerate.IO.Data.Vector.Unboxed as AU
+import qualified Data.Array.Accelerate.Interpreter as A
+import Data.Array.Accelerate (Matrix, Elt, Shape, (:.)(..), Z(..))
 
 import qualified Data.Vector.Unboxed as DVU
 import Data.List (concat)
@@ -69,21 +70,20 @@ map'' m = cooc2mat toI m
 
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
-cooc2mat :: Ord t => Map t Index -> Map (t, t) Int -> A.Matrix Int
-cooc2mat ti m = A.fromFunction shape (\(A.Z A.:. x A.:. y) -> lookup' x y)
+cooc2mat :: Ord t => Map t Index -> Map (t, t) Int -> Matrix Int
+cooc2mat ti m = A.fromFunction shape (\(Z :. x :. y) -> lookup' x y)
   where
     shape = (A.Z A.:. n A.:. n)
     n = M.size ti
     lookup' x y = maybe 0 identity (M.lookup (x,y) (toIndex ti m))
 
-mat2cooc :: A.Matrix Double -> Map (Index, Index) Double
-mat2cooc m = M.fromList $ concat                                          --  [((Int,Int), Double)]
-                        $ map (\(x,xs) -> map (\(y,ys) -> ((x,y),ys)) xs) -- [[((Int,Int), Double)]]
-                        $ zip ([1..] :: [Int])                            -- [(Int, [(Int, Double)]]
-                        $ map (zip ([1..] :: [Int]))                      -- [[(Int, Double)]]
-                        $ splitEvery n (A.toList m)                       -- [[Double]]
+-- TODO rename mat2map
+mat2cooc :: (Elt a, Shape (Z :. Index)) =>
+            A.Array (Z :. Index :. Index) a -> Map (Index, Index) a
+mat2cooc m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
   where
-    A.Z A.:. _ A.:. n = A.arrayShape m
+    Z :. _ :. n = A.arrayShape m
+    f ((Z :. i :. j), x) = ((i, j), x)
 
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
-- 
2.47.0


From 30757cfc6b1b8587a28d244b04ad87176cdc21b8 Mon Sep 17 00:00:00 2001
From: Nicolas Pouillard <nicolas.pouillard@gmail.com>
Date: Wed, 30 May 2018 14:33:25 +0200
Subject: [PATCH 03/16] Add map2mat

---
 src/Gargantext/Pipeline.hs        |  4 ----
 src/Gargantext/Viz/Graph/Index.hs | 10 ++++++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index ba74e8ee..58004b41 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -47,7 +47,3 @@ pipeline pth = do
   pure $ map' int2double myCooc
   -- Matrix -> Graph
 
-
-
-
-
diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
index 9a378a20..550a927a 100644
--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -31,7 +31,7 @@ import qualified Data.Array.Accelerate.Interpreter as A
 import Data.Array.Accelerate (Matrix, Elt, Shape, (:.)(..), Z(..))
 
 import qualified Data.Vector.Unboxed as DVU
-import Data.List (concat)
+import Data.Maybe (fromMaybe)
 
 import Data.Set (Set)
 import qualified Data.Set as S
@@ -75,7 +75,13 @@ cooc2mat ti m = A.fromFunction shape (\(Z :. x :. y) -> lookup' x y)
   where
     shape = (A.Z A.:. n A.:. n)
     n = M.size ti
-    lookup' x y = maybe 0 identity (M.lookup (x,y) (toIndex ti m))
+    idx = toIndex ti m -- it is important to make sure that toIndex is ran only once.
+    lookup' x y = fromMaybe 0 $ M.lookup (x,y) idx
+
+map2mat :: Elt a => a -> Int -> Map (Index, Index) a -> Matrix a
+map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.lookup (x, y) m)
+  where
+    shape = (Z :. n :. n)
 
 -- TODO rename mat2map
 mat2cooc :: (Elt a, Shape (Z :. Index)) =>
-- 
2.47.0


From 9a6c89465488d8f0889da106dba79b6dcdf36a98 Mon Sep 17 00:00:00 2001
From: Nicolas Pouillard <nicolas.pouillard@gmail.com>
Date: Wed, 30 May 2018 14:35:30 +0200
Subject: [PATCH 04/16] New cooc2mat

---
 src/Gargantext/Viz/Graph/Index.hs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
index 550a927a..806157cf 100644
--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -71,12 +71,10 @@ map'' m = cooc2mat toI m
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 cooc2mat :: Ord t => Map t Index -> Map (t, t) Int -> Matrix Int
-cooc2mat ti m = A.fromFunction shape (\(Z :. x :. y) -> lookup' x y)
+cooc2mat ti m = map2mat 0 n idx
   where
-    shape = (A.Z A.:. n A.:. n)
     n = M.size ti
     idx = toIndex ti m -- it is important to make sure that toIndex is ran only once.
-    lookup' x y = fromMaybe 0 $ M.lookup (x,y) idx
 
 map2mat :: Elt a => a -> Int -> Map (Index, Index) a -> Matrix a
 map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.lookup (x, y) m)
-- 
2.47.0


From 4b81f9d11b3fda2832fb469ed81b2efa4b7eecae Mon Sep 17 00:00:00 2001
From: Nicolas Pouillard <nicolas.pouillard@gmail.com>
Date: Wed, 30 May 2018 14:56:41 +0200
Subject: [PATCH 05/16] small change in createIndexes

---
 src/Gargantext/Viz/Graph/Index.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
index 806157cf..1721af04 100644
--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -112,7 +112,8 @@ createIndexes = set2indexes . cooc2set
     set2indexes :: Ord t => Set t -> (Map t Index, Map Index t)
     set2indexes s = (M.fromList toIndex', M.fromList fromIndex')
       where
-        fromIndex' = zip [1..] (S.toList s)
-        toIndex'   = zip (S.toList s)  [1..]
+        fromIndex' = zip [0..] xs
+        toIndex'   = zip xs [0..]
+        xs         = S.toList s
 
 
-- 
2.47.0


From 0be01d72c7eca85a8b77c6c447a06d84d7732c74 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Thu, 31 May 2018 01:04:07 +0200
Subject: [PATCH 06/16] [Pipeline] ok until clustering.

---
 src/Gargantext/Pipeline.hs                    | 30 +++++++----------
 src/Gargantext/Text/Context.hs                | 32 ++++++++++++-------
 src/Gargantext/Text/Parsers/CSV.hs            | 12 +++----
 src/Gargantext/Text/Terms.hs                  |  4 +++
 src/Gargantext/Viz/Graph/Distances/Matrice.hs | 24 +++++++-------
 src/Gargantext/Viz/Graph/Index.hs             | 25 +++------------
 6 files changed, 58 insertions(+), 69 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index 58004b41..fc341f3a 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -15,35 +15,29 @@ Portability : POSIX
 module Gargantext.Pipeline
   where
 
-import Data.Text (unpack)
-import qualified Data.Text as DT
-
 import Data.Text.IO (readFile)
 
 ----------------------------------------------
 ----------------------------------------------
-
 import Gargantext.Core
-import Gargantext.Core.Types
 import Gargantext.Prelude
 
-import Gargantext.Viz.Graph.Index (map', createIndexes)
-import Gargantext.Viz.Graph.Distances.Matrice (distributional, int2double)
+import Gargantext.Viz.Graph.Index (score)
+import Gargantext.Viz.Graph.Distances.Matrice (distributional)
 import Gargantext.Text.Metrics.Occurrences
 import Gargantext.Text.Terms
 import Gargantext.Text.Context
 
-import Data.Array.Accelerate as A
 
-pipeline pth = do
-  text     <- readFile pth
-  let contexts = splitBy Sentences 4 text
-  myterms <- mapM (terms Multi FR) contexts
-  -- todo filter stop words
+pipeline path = do
+  -- Text  <- IO Text <- FilePath
+  text     <- readFile path  
+  let contexts = splitBy (Sentences 3) text
+  myterms <- extractTerms Multi FR contexts
+  -- TODO    filter (\t -> not . elem t stopList) myterms
+  -- TODO    groupBy (Stem | GroupList)
   let myCooc = removeApax $ cooc myterms
-  --pure myCooc
-  -- Cooc map -> Matrix
-  --pure $ createIndexes myCooc
-  pure $ map' int2double myCooc
-  -- Matrix -> Graph
+  -- Cooc -> Matrix
+  pure $ score distributional myCooc
+  -- Matrix -> Clustering -> Graph -> JSON
 
diff --git a/src/Gargantext/Text/Context.hs b/src/Gargantext/Text/Context.hs
index 6935f24d..bd63baa5 100644
--- a/src/Gargantext/Text/Context.hs
+++ b/src/Gargantext/Text/Context.hs
@@ -14,26 +14,34 @@ Context of text management tool
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 
-module Gargantext.Text.Context where
+module Gargantext.Text.Context
+  where
 
 import Data.Text (Text, pack, unpack, length)
 import Data.String (IsString)
 
-import Text.HTML.TagSoup
+import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
 import Gargantext.Text
 import Gargantext.Prelude hiding (length)
 
-data SplitBy = Paragraph | Sentences | Chars
-
 
-splitBy :: SplitBy -> Int -> Text -> [Text]
-splitBy Chars     n = map pack . chunkAlong n n . unpack
-splitBy Sentences n = map unsentences . chunkAlong n n  . sentences
-splitBy Paragraph _ = map removeTag   . filter isTagText . parseTags
+data SplitContext = Chars Int | Sentences Int | Paragraphs Int
+
+tag = parseTags
+-- | splitBy contexts of Chars or Sentences or Paragraphs
+-- >> splitBy (Chars 0) "abcde"
+-- ["a","b","c","d","e"]
+-- >> splitBy (Chars 1) "abcde"
+-- ["ab","bc","cd","de"]
+-- >> splitBy (Chars 2) "abcde"
+-- ["abc","bcd","cde"]
+splitBy :: SplitContext -> Text -> [Text]
+splitBy (Chars     n)  = map pack        . chunkAlong (n+1) 1 . unpack
+splitBy (Sentences n)  = map unsentences . chunkAlong (n+1) 1 . sentences
+splitBy (Paragraphs _) = map unTag       . filter isTagText   . tag
   where
-    removeTag :: IsString p => Tag p -> p
-    removeTag (TagText x) = x
-    removeTag (TagComment x) = x
-    removeTag _          = ""
+    unTag :: IsString p => Tag p -> p
+    unTag (TagText x) = x
+    unTag _           = ""
 
 
diff --git a/src/Gargantext/Text/Parsers/CSV.hs b/src/Gargantext/Text/Parsers/CSV.hs
index d3d0c39f..0ae33a29 100644
--- a/src/Gargantext/Text/Parsers/CSV.hs
+++ b/src/Gargantext/Text/Parsers/CSV.hs
@@ -50,13 +50,13 @@ data Doc = Doc
     deriving (Show)
 ---------------------------------------------------------------
 toDocs :: Vector CsvDoc -> [Doc]
-toDocs v = V.toList 
+toDocs v = V.toList
          $ V.zipWith (\nId (CsvDoc t s py pm pd abst auth)
                        -> Doc nId t s py pm pd abst auth )
                        (V.enumFromN 1 (V.length v'')) v''
           where
             v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
-            seps= (V.fromList [Paragraph, Sentences, Chars])
+            seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
 
 ---------------------------------------------------------------
 fromDocs :: Vector Doc -> Vector CsvDoc
@@ -69,7 +69,7 @@ fromDocs docs = V.map fromDocs' docs
 -- TODO adapt the size of the paragraph according to the corpus average
 
 
-splitDoc :: Mean -> SplitBy -> CsvDoc -> Vector CsvDoc
+splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
 splitDoc m splt doc = let docSize = (length $ c_abstract doc) in
                           if docSize > 1000
                             then
@@ -82,15 +82,15 @@ splitDoc m splt doc = let docSize = (length $ c_abstract doc) in
                               V.fromList [doc]
 
 
-splitDoc' :: SplitBy -> CsvDoc -> Vector CsvDoc
-splitDoc' splt (CsvDoc t s py pm pd abst auth) = V.fromList $ [firstDoc] <> nextDocs
+splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
+splitDoc' contextSize (CsvDoc t s py pm pd abst auth) = V.fromList $ [firstDoc] <> nextDocs
     where
       firstDoc = CsvDoc t s py pm pd firstAbstract auth
       firstAbstract = head' abstracts
       
       nextDocs = map (\txt -> CsvDoc (head' $ sentences txt) s py pm pd (unsentences $ tail' $ sentences txt) auth) (tail' abstracts)
       
-      abstracts    = (splitBy splt 20) abst
+      abstracts    = (splitBy $ contextSize) abst
       head' x = maybe ""   identity (head x)
       tail' x = maybe [""] identity (tailMay x)
 
diff --git a/src/Gargantext/Text/Terms.hs b/src/Gargantext/Text/Terms.hs
index f19496f2..112de26d 100644
--- a/src/Gargantext/Text/Terms.hs
+++ b/src/Gargantext/Text/Terms.hs
@@ -34,6 +34,7 @@ module Gargantext.Text.Terms
   where
 
 import Data.Text (Text)
+import Data.Traversable
 
 import Gargantext.Prelude
 import Gargantext.Core
@@ -46,6 +47,9 @@ data TermType = Mono | Multi
 -- remove Stop Words
 -- map (filter (\t -> not . elem t)) $ 
 ------------------------------------------------------------------------
+extractTerms :: Traversable t => TermType -> Lang -> t Text -> IO (t [Terms])
+extractTerms termType lang = mapM (terms termType lang)
+------------------------------------------------------------------------
 terms :: TermType -> Lang -> Text -> IO [Terms]
 terms Mono  lang txt = pure $ monoterms' lang txt
 terms Multi lang txt = multiterms lang txt
diff --git a/src/Gargantext/Viz/Graph/Distances/Matrice.hs b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
index 546dedc8..f5b26298 100644
--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -36,10 +36,8 @@ Implementation use Accelerate library :
 module Gargantext.Viz.Graph.Distances.Matrice
   where
 
---import Data.Array.Accelerate.Data.Bits
-import Data.Array.Accelerate.Interpreter (run)
-
 import Data.Array.Accelerate
+import Data.Array.Accelerate.Interpreter (run)
 import Data.Array.Accelerate.Smart
 import Data.Array.Accelerate.Type
 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
@@ -94,14 +92,7 @@ type SpecificityGenericity = Double
 conditional :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 conditional m = (run $ ie (use m), run $ sg (use m))
   where
-    r :: Rank
-    r = rank' m
 
-    xs :: Matrix' Double -> Matrix' Double
-    xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
-    ys :: Acc (Matrix Double) -> Acc (Matrix Double)
-    ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
-    
     ie :: Matrix' Double -> Matrix' Double
     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
@@ -109,7 +100,14 @@ conditional m = (run $ ie (use m), run $ sg (use m))
 
     n :: Exp Double
     n = P.fromIntegral r
-    
+
+    r :: Rank
+    r = rank' m
+
+    xs :: Matrix' Double -> Matrix' Double
+    xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
+    ys :: Acc (Matrix Double) -> Acc (Matrix Double)
+    ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 
 -- filter with threshold
 -----------------------------------------------------------------------
@@ -121,7 +119,9 @@ distributional m = run $ filter $ ri (map fromIntegral $ use m)
   where
     n    = rank' m
     
-    miniMax m = map (\x -> ifThenElse (x > (the $ minimum $ maximum m)) x 0) m
+    miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
+      where
+        miniMax' = (the $ minimum $ maximum m)
     
     filter  m = zipWith (\a b -> max a b) m (transpose m)
     
diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
index 1721af04..3ccf4a46 100644
--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -43,28 +43,12 @@ import Gargantext.Prelude
 
 type Index    = Int
 
-
 -------------------------------------------------------------------------------
-{-
-map'' :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
-                -> Map (t, t) Int
-                -> Map (t, t) Double
-map'' f m = back . f' . from m
-  where
-    from (fs, m') = unzip $ M.toAscList m
-    f'  = f $ A.fromList shape m'
-    shape = (A.Z A.:. n A.:. n)
-    back = M.fromAscList . zip fs . A.toList
--}
 -------------------------------------------------------------------------------
-map' :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
+score :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
                 -> Map (t, t) Int
                 -> Map (t, t) Double
-map' f m = fromIndex fromI . mat2cooc . f $ cooc2mat toI m
-  where
-    (toI, fromI) = createIndexes m
-
-map'' m = cooc2mat toI m
+score f m = fromIndex fromI . mat2map . f $ cooc2mat toI m
   where
     (toI, fromI) = createIndexes m
 
@@ -81,10 +65,9 @@ map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.look
   where
     shape = (Z :. n :. n)
 
--- TODO rename mat2map
-mat2cooc :: (Elt a, Shape (Z :. Index)) =>
+mat2map :: (Elt a, Shape (Z :. Index)) =>
             A.Array (Z :. Index :. Index) a -> Map (Index, Index) a
-mat2cooc m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
+mat2map m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
   where
     Z :. _ :. n = A.arrayShape m
     f ((Z :. i :. j), x) = ((i, j), x)
-- 
2.47.0


From b76fc48993fdf631b3725468745ac8baccc674ee Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Thu, 31 May 2018 07:27:00 +0200
Subject: [PATCH 07/16] [CLEAN] imports for pipeline.

---
 src/Gargantext/Pipeline.hs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index fc341f3a..ba3f4230 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -16,17 +16,15 @@ module Gargantext.Pipeline
   where
 
 import Data.Text.IO (readFile)
-
-----------------------------------------------
 ----------------------------------------------
-import Gargantext.Core
+import Gargantext.Core (Lang(FR))
 import Gargantext.Prelude
 
 import Gargantext.Viz.Graph.Index (score)
 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
-import Gargantext.Text.Metrics.Occurrences
-import Gargantext.Text.Terms
-import Gargantext.Text.Context
+import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
+import Gargantext.Text.Terms (TermType(Multi), extractTerms)
+import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
 
 
 pipeline path = do
-- 
2.47.0


From 47ed713fc5344a2a8aed996e5bdf4320b2d40096 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Thu, 31 May 2018 10:23:15 +0200
Subject: [PATCH 08/16] [PIPELINE] adding clustering louvain.

---
 src/Gargantext/Pipeline.hs | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index ba3f4230..b19738f9 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -16,26 +16,35 @@ module Gargantext.Pipeline
   where
 
 import Data.Text.IO (readFile)
+import qualified Data.Map.Strict as M
 ----------------------------------------------
 import Gargantext.Core (Lang(FR))
 import Gargantext.Prelude
 
-import Gargantext.Viz.Graph.Index (score)
+import Gargantext.Viz.Graph.Index (score, createIndexes, toIndex)
 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
-import Gargantext.Text.Terms (TermType(Multi), extractTerms)
+import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
 
+import Data.Graph.Clustering.Louvain (bestpartition)
+import Data.Graph.Clustering.Louvain.Utils (map2graph)
 
 pipeline path = do
   -- Text  <- IO Text <- FilePath
   text     <- readFile path  
   let contexts = splitBy (Sentences 3) text
   myterms <- extractTerms Multi FR contexts
+  
   -- TODO    filter (\t -> not . elem t stopList) myterms
   -- TODO    groupBy (Stem | GroupList)
+  
   let myCooc = removeApax $ cooc myterms
+  
   -- Cooc -> Matrix
-  pure $ score distributional myCooc
+  let theScores = M.filter (/=0) $ score distributional myCooc
+  let (ti, _) = createIndexes theScores
+  
   -- Matrix -> Clustering -> Graph -> JSON
+  pure $ bestpartition False $ map2graph $ toIndex ti theScores
 
-- 
2.47.0


From a5bcf8db2c1ef4b702b09af7528dbb26af737182 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Thu, 31 May 2018 10:43:57 +0200
Subject: [PATCH 09/16] [RENAME] name and newtypes for createIndices.

---
 src/Gargantext/Pipeline.hs        |  4 ++--
 src/Gargantext/Viz/Graph/Index.hs | 29 ++++++++++++++++++++---------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index b19738f9..ba20344c 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -21,7 +21,7 @@ import qualified Data.Map.Strict as M
 import Gargantext.Core (Lang(FR))
 import Gargantext.Prelude
 
-import Gargantext.Viz.Graph.Index (score, createIndexes, toIndex)
+import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
@@ -43,7 +43,7 @@ pipeline path = do
   
   -- Cooc -> Matrix
   let theScores = M.filter (/=0) $ score distributional myCooc
-  let (ti, _) = createIndexes theScores
+  let (ti, _) = createIndices theScores
   
   -- Matrix -> Clustering -> Graph -> JSON
   pure $ bestpartition False $ map2graph $ toIndex ti theScores
diff --git a/src/Gargantext/Viz/Graph/Index.hs b/src/Gargantext/Viz/Graph/Index.hs
index 3ccf4a46..fd693a52 100644
--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -39,6 +39,8 @@ import qualified Data.Set as S
 import Data.Map (Map)
 import qualified Data.Map.Strict    as M
 
+import Data.Vector (Vector)
+
 import Gargantext.Prelude
 
 type Index    = Int
@@ -50,7 +52,7 @@ score :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
                 -> Map (t, t) Double
 score f m = fromIndex fromI . mat2map . f $ cooc2mat toI m
   where
-    (toI, fromI) = createIndexes m
+    (toI, fromI) = createIndices m
 
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
@@ -79,21 +81,30 @@ toIndex ni ns = indexConversion ni ns
 
 fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
 fromIndex ni ns = indexConversion ni ns
----------------------------------------------------------------------------------
+
 indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
 indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
+---------------------------------------------------------------------------------
+
 -------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-createIndexes :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
-createIndexes = set2indexes . cooc2set
+-- TODO
+fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
+fromIndex' vi ns = undefined
+
+-- TODO
+createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t)
+createIndices' = undefined
+
+createIndices :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
+createIndices = set2indices . map2set
   where
-    cooc2set :: Ord t => Map (t, t) a -> Set t
-    cooc2set cs' = foldl' (\s ((t1,t2),_) -> insert [t1,t2] s ) S.empty (M.toList cs')
+    map2set :: Ord t => Map (t, t) a -> Set t
+    map2set cs' = foldl' (\s ((t1,t2),_) -> insert [t1,t2] s ) S.empty (M.toList cs')
       where
         insert as s = foldl' (\s' t -> S.insert t s') s as
 
-    set2indexes :: Ord t => Set t -> (Map t Index, Map Index t)
-    set2indexes s = (M.fromList toIndex', M.fromList fromIndex')
+    set2indices :: Ord t => Set t -> (Map t Index, Map Index t)
+    set2indices s = (M.fromList toIndex', M.fromList fromIndex')
       where
         fromIndex' = zip [0..] xs
         toIndex'   = zip xs [0..]
-- 
2.47.0


From 00344aaf2b97c9b4105f46574703633d5b097b54 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Thu, 31 May 2018 10:49:51 +0200
Subject: [PATCH 10/16] [TERMS] adding type MonoMulti.

---
 src/Gargantext/Text/Terms.hs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/Gargantext/Text/Terms.hs b/src/Gargantext/Text/Terms.hs
index 112de26d..4f17555f 100644
--- a/src/Gargantext/Text/Terms.hs
+++ b/src/Gargantext/Text/Terms.hs
@@ -42,16 +42,23 @@ import Gargantext.Core.Types
 import Gargantext.Text.Terms.Multi (multiterms)
 import Gargantext.Text.Terms.Mono  (monoterms')
 
-data TermType = Mono | Multi
+data TermType = Mono | Multi | MonoMulti
 
 -- remove Stop Words
 -- map (filter (\t -> not . elem t)) $ 
 ------------------------------------------------------------------------
+-- | Sugar to extract terms from text (hiddeng mapM from end user).
 extractTerms :: Traversable t => TermType -> Lang -> t Text -> IO (t [Terms])
 extractTerms termType lang = mapM (terms termType lang)
 ------------------------------------------------------------------------
+-- | Terms from Text
+-- Mono : mono terms
+-- Multi : multi terms
+-- MonoMulti : mono and multi
+-- TODO : multi terms should exclude mono (intersection is not empty yet)
 terms :: TermType -> Lang -> Text -> IO [Terms]
-terms Mono  lang txt = pure $ monoterms' lang txt
-terms Multi lang txt = multiterms lang txt
+terms Mono  lang txt     = pure $ monoterms' lang txt
+terms Multi lang txt     = multiterms lang txt
+terms MonoMulti lang txt = terms Multi lang txt
 ------------------------------------------------------------------------
 
-- 
2.47.0


From 09cf291732121325305090854d465e848b16a47a Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Thu, 31 May 2018 22:01:53 +0200
Subject: [PATCH 11/16] [Pipeline] clustering with C++ Louvain bindings, ok.

---
 src/Gargantext/Pipeline.hs                    | 18 +++++++--------
 src/Gargantext/Viz/Graph.hs                   |  5 ----
 src/Gargantext/Viz/Graph/Distances/Matrice.hs | 23 +++++++++++++------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index ba20344c..a245f972 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -22,17 +22,16 @@ import Gargantext.Core (Lang(FR))
 import Gargantext.Prelude
 
 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
-import Gargantext.Viz.Graph.Distances.Matrice (distributional)
+import Gargantext.Viz.Graph.Distances.Matrice (conditional)
 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
 
-import Data.Graph.Clustering.Louvain (bestpartition)
-import Data.Graph.Clustering.Louvain.Utils (map2graph)
+import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
 
 pipeline path = do
   -- Text  <- IO Text <- FilePath
-  text     <- readFile path  
+  text     <- readFile path
   let contexts = splitBy (Sentences 3) text
   myterms <- extractTerms Multi FR contexts
   
@@ -40,11 +39,12 @@ pipeline path = do
   -- TODO    groupBy (Stem | GroupList)
   
   let myCooc = removeApax $ cooc myterms
-  
   -- Cooc -> Matrix
-  let theScores = M.filter (/=0) $ score distributional myCooc
+  let theScores = M.take 350 $ M.filter (>0) $ score conditional myCooc
   let (ti, _) = createIndices theScores
-  
-  -- Matrix -> Clustering -> Graph -> JSON
-  pure $ bestpartition False $ map2graph $ toIndex ti theScores
+--
+----  -- Matrix -> Clustering -> Graph -> JSON
+----  pure $ bestpartition False $ map2graph $ toIndex ti theScores
+  partitions <- cLouvain $ toIndex ti theScores
+  pure partitions
 
diff --git a/src/Gargantext/Viz/Graph.hs b/src/Gargantext/Viz/Graph.hs
index c1520ed0..332d6551 100644
--- a/src/Gargantext/Viz/Graph.hs
+++ b/src/Gargantext/Viz/Graph.hs
@@ -60,8 +60,3 @@ $(deriveJSON (unPrefix "g_") ''Graph)
 
 
-
-
-
-
-
diff --git a/src/Gargantext/Viz/Graph/Distances/Matrice.hs b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
index f5b26298..a7ed1c83 100644
--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -89,8 +89,21 @@ type Matrix' a = Acc (Matrix a)
 type InclusionExclusion    = Double
 type SpecificityGenericity = Double
 
-conditional :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
-conditional m = (run $ ie (use m), run $ sg (use m))
+
+miniMax :: Matrix' Double -> Matrix' Double
+miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
+  where
+    miniMax' = (the $ minimum $ maximum m)
+
+conditional :: Matrix Int -> Matrix Double
+conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
+  where
+    r :: Rank
+    r = rank' m
+
+
+conditional' :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
+conditional' m = (run $ ie (use m), run $ sg (use m))
   where
 
     ie :: Matrix' Double -> Matrix' Double
@@ -115,14 +128,10 @@ conditional m = (run $ ie (use m), run $ sg (use m))
 -- | Distributional Distance
 
 distributional :: Matrix Int -> Matrix Double
-distributional m = run $ filter $ ri (map fromIntegral $ use m)
+distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
   where
     n    = rank' m
     
-    miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
-      where
-        miniMax' = (the $ minimum $ maximum m)
-    
     filter  m = zipWith (\a b -> max a b) m (transpose m)
     
     ri mat = zipWith (/) mat1 mat2
-- 
2.47.0


From c34120e3f0b2f47f1225f049a01337c2a5e7b48b Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Fri, 1 Jun 2018 17:05:04 +0200
Subject: [PATCH 12/16] [FIX] fix cooc behavior.

---
 src/Gargantext/Pipeline.hs                    | 48 +++++++++++---
 src/Gargantext/Prelude.hs                     |  2 -
 src/Gargantext/Text/Metrics.hs                | 65 +++++++++++++++++--
 src/Gargantext/Text/Metrics/Occurrences.hs    | 24 +++----
 src/Gargantext/Text/Terms/Multi/PosTagging.hs |  6 +-
 src/Gargantext/Viz/Graph/Distances/Matrice.hs | 18 ++++-
 6 files changed, 132 insertions(+), 31 deletions(-)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index a245f972..1c5a7d72 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -16,19 +16,44 @@ module Gargantext.Pipeline
   where
 
 import Data.Text.IO (readFile)
+
+import Control.Arrow ((***))
+import Data.Map.Strict (Map)
 import qualified Data.Map.Strict as M
+import qualified Data.Set        as S
+import qualified Data.List       as L
+import Data.Tuple.Extra (both)
 ----------------------------------------------
 import Gargantext.Core (Lang(FR))
 import Gargantext.Prelude
 
-import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
-import Gargantext.Viz.Graph.Distances.Matrice (conditional)
+import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
+import Gargantext.Viz.Graph.Distances.Matrice (incExcSpeGen, conditional)
+import Gargantext.Viz.Graph.Index (Index)
 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
 
 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
 
+--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
+--filterCooc m = 
+---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
+----(ti, fi)  = createIndices m
+-- . fromIndex fi $ filterMat $ cooc2mat ti m
+
+
+
+import Data.Array.Accelerate (Matrix)
+filterMat :: Matrix Int -> [(Index, Index)]
+filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
+  where
+    (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (incExcSpeGen m)
+    n = nIe + nSg
+    nIe = 30
+    nSg = 70
+
+
 pipeline path = do
   -- Text  <- IO Text <- FilePath
   text     <- readFile path
@@ -39,12 +64,17 @@ pipeline path = do
   -- TODO    groupBy (Stem | GroupList)
   
   let myCooc = removeApax $ cooc myterms
+  let (ti, fi) = createIndices myCooc
+  pure ti
   -- Cooc -> Matrix
-  let theScores = M.take 350 $ M.filter (>0) $ score conditional myCooc
-  let (ti, _) = createIndices theScores
---
-----  -- Matrix -> Clustering -> Graph -> JSON
-----  pure $ bestpartition False $ map2graph $ toIndex ti theScores
-  partitions <- cLouvain $ toIndex ti theScores
-  pure partitions
+  
+--  -- filter by spec/gen (dynmaic programming)
+--  let theScores = M.filter (>0) $ score conditional myCoocFiltered
+----
+------  -- Matrix -> Clustering
+------  pure $ bestpartition False $ map2graph $ toIndex ti theScores
+--  partitions <- cLouvain theScores
+--  pure partitions
+---- | Building : -> Graph -> JSON
+
 
diff --git a/src/Gargantext/Prelude.hs b/src/Gargantext/Prelude.hs
index 4cad2f18..6b12fdd6 100644
--- a/src/Gargantext/Prelude.hs
+++ b/src/Gargantext/Prelude.hs
@@ -237,5 +237,3 @@ unMaybe = map fromJust . L.filter isJust
 -- maximumWith
 maximumWith f = L.maximumBy (\x y -> compare (f x) (f y))
 
-
-
diff --git a/src/Gargantext/Text/Metrics.hs b/src/Gargantext/Text/Metrics.hs
index 4904a5cf..c37e5ac8 100644
--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
@@ -14,12 +14,69 @@ Mainly reexport functions in @Data.Text.Metrics@
 
 module Gargantext.Text.Metrics where
 
---import Data.Text (Text)
+import Data.Text (Text, pack)
+import Data.List (concat)
+
 --import GHC.Real (Ratio)
 --import qualified Data.Text.Metrics as DTM
---
---import Gargantext.Prelude
---
+
+import Gargantext.Prelude
+
+import Gargantext.Text.Metrics.Occurrences (occurrences, cooc)
+import Gargantext.Text.Terms (TermType(Multi), terms)
+import Gargantext.Core (Lang(EN))
+import Gargantext.Core.Types (Terms(..))
+import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
+
 --noApax :: Ord a => Map a Occ -> Map a Occ
 --noApax m = M.filter (>1) m
 
+
+metrics_text :: Text
+metrics_text = "A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table."
+
+-- | Sentences 
+metrics_sentences :: [Text]
+metrics_sentences = [ "A table is an object."
+                    , "A glas is an object."
+                    , "The glas is on the table."
+                    , "The spoon is an object."
+                    , "The spoon is on the table."
+                    ]
+
+
+metrics_sentences_Test = splitBy (Sentences 0) metrics_text == metrics_sentences
+
+-- | Terms reordered to visually check occurrences
+metrics_terms :: [[[Text]]]
+metrics_terms = [[["table"],["object"]                  ]
+                ,[          ["object"],["glas"]         ]
+                ,[["table"],           ["glas"]         ]
+                ,[          ["object"],        ["spoon"]]
+                ,[["table"],                   ["spoon"]]
+                ]
+--metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
+
+-- | Occurrences
+{-
+fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
+         , (fromList ["object"],fromList [(["object"], 3 )])
+         , (fromList ["glas"]  ,fromList [(["glas"]  , 2 )])
+         , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
+-}
+metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
+
+{- 
+-- fromList [((["glas"],["object"]),6)
+            ,((["glas"],["spoon"]),4)
+            ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
+
+-}
+metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
+
+metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")
+
+
+
+
+
diff --git a/src/Gargantext/Text/Metrics/Occurrences.hs b/src/Gargantext/Text/Metrics/Occurrences.hs
index 00a0f429..dbfc692f 100644
--- a/src/Gargantext/Text/Metrics/Occurrences.hs
+++ b/src/Gargantext/Text/Metrics/Occurrences.hs
@@ -82,9 +82,9 @@ removeApax = DMS.filter (> 1)
 
 cooc :: [[Terms]] -> Map (Label, Label) Int
 cooc tss =
-  mapKeys (delta $ labelPolicy terms_occs) $ cooc' (map (Set.fromList . map _terms_stem) tss)
+  mapKeys (delta $ labelPolicy terms_occs) $ coocOn _terms_stem tss
   where
-    terms_occs = occurrences (List.concat tss)
+    terms_occs = occurrencesOn _terms_stem (List.concat tss)
     delta f = f *** f
 
 
@@ -93,24 +93,26 @@ labelPolicy m g =  case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
                      Just label -> label
                      Nothing    -> panic $ "Label of Grouped not found: " <> (pack $ show g)
 
-cooc' :: Ord b => [Set b] -> Map (b, b) Coocs
-cooc' tss = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
+coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
+coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
+
+coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
+coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
   where
+      ts' = List.nub $ map f ts
       xs = [ ((x, y), 1)
-           | xs <- tss
-           , ys <- tss
-           , x <- Set.toList xs
-           , y <- Set.toList ys
+           | x <- ts'
+           , y <- ts'
            , x < y
            ]
 
 
 -- | Compute the grouped occurrences (occ)
 occurrences :: [Terms] -> Map Grouped (Map Terms Int)
-occurrences = occurrences' _terms_stem
+occurrences = occurrencesOn _terms_stem
 
-occurrences' :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
-occurrences' f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
+occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
+occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
 
 -- TODO add groups and filter stops
 sumOcc :: Ord a => [Occ a] -> Occ a
diff --git a/src/Gargantext/Text/Terms/Multi/PosTagging.hs b/src/Gargantext/Text/Terms/Multi/PosTagging.hs
index 3622063f..b6dceec7 100644
--- a/src/Gargantext/Text/Terms/Multi/PosTagging.hs
+++ b/src/Gargantext/Text/Terms/Multi/PosTagging.hs
@@ -96,10 +96,10 @@ data Properties = Properties { _propertiesAnnotators  :: Text
 
 $(deriveJSON (unPrefix "_properties") ''Properties)
 
-data Sentences = Sentences { _sentences :: [Sentence]}
+data PosSentences = PosSentences { _sentences :: [Sentence]}
   deriving (Show, Generic)
 
-$(deriveJSON (unPrefix "_") ''Sentences)
+$(deriveJSON (unPrefix "_") ''PosSentences)
 
 
 -- request = 
@@ -134,7 +134,7 @@ corenlpRaw lang txt = do
   pure (getResponseBody response)
 
 
-corenlp :: Lang -> Text -> IO Sentences
+corenlp :: Lang -> Text -> IO PosSentences
 corenlp lang txt = do
   response <- corenlp' lang txt
   pure (getResponseBody response)
diff --git a/src/Gargantext/Viz/Graph/Distances/Matrice.hs b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
index a7ed1c83..1aa4db24 100644
--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -102,8 +102,22 @@ conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
     r = rank' m
 
 
-conditional' :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
-conditional' m = (run $ ie (use m), run $ sg (use m))
+{-
+Metric Specificity and genericty: select terms
+   Compute genericity/specificity:
+        P(j|i) = N(ij) / N(ii)
+        P(i|j) = N(ij) / N(jj)
+
+        Gen(i)  = Mean{j} P(j_k|i)
+        Spec(i) = Mean{j} P(i|j_k)
+
+        Gen-clusion(i)  = (Spec(i) + Gen(i)) / 2
+        Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
+
+-}
+
+incExcSpeGen :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
+incExcSpeGen m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
   where
 
     ie :: Matrix' Double -> Matrix' Double
-- 
2.47.0


From 6551bf9017a3141cce92604e799d579044063222 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Sat, 2 Jun 2018 22:50:52 +0200
Subject: [PATCH 13/16] [COUNT] renaming file and generic function.

---
 src/Gargantext/Pipeline.hs                    |  2 +-
 src/Gargantext/Text.hs                        |  2 +-
 src/Gargantext/Text/Metrics.hs                | 43 +++++++++++--------
 .../Text/Metrics/{Occurrences.hs => Count.hs} | 34 +++++++++------
 src/Gargantext/Viz/Graph/Distances/Matrice.hs |  2 +-
 5 files changed, 47 insertions(+), 36 deletions(-)
 rename src/Gargantext/Text/Metrics/{Occurrences.hs => Count.hs} (86%)

diff --git a/src/Gargantext/Pipeline.hs b/src/Gargantext/Pipeline.hs
index 1c5a7d72..c12bbace 100644
--- a/src/Gargantext/Pipeline.hs
+++ b/src/Gargantext/Pipeline.hs
@@ -30,7 +30,7 @@ import Gargantext.Prelude
 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
 import Gargantext.Viz.Graph.Distances.Matrice (incExcSpeGen, conditional)
 import Gargantext.Viz.Graph.Index (Index)
-import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
+import Gargantext.Text.Metrics.Count (cooc, removeApax)
 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
 
diff --git a/src/Gargantext/Text.hs b/src/Gargantext/Text.hs
index 11ca201f..c31aedc0 100644
--- a/src/Gargantext/Text.hs
+++ b/src/Gargantext/Text.hs
@@ -27,7 +27,7 @@ import NLP.FullStop (segment)
 -----------------------------------------------------------------
 import Gargantext.Core
 import Gargantext.Core.Types
-import Gargantext.Text.Metrics.Occurrences (Occ, occurrences, cooc)
+import Gargantext.Text.Metrics.Count (Occ, occurrences, cooc)
 import Gargantext.Prelude hiding (filter)
 -----------------------------------------------------------------
 
diff --git a/src/Gargantext/Text/Metrics.hs b/src/Gargantext/Text/Metrics.hs
index c37e5ac8..925dea93 100644
--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
@@ -15,6 +15,7 @@ Mainly reexport functions in @Data.Text.Metrics@
 module Gargantext.Text.Metrics where
 
 import Data.Text (Text, pack)
+import qualified Data.Text as T
 import Data.List (concat)
 
 --import GHC.Real (Ratio)
@@ -22,7 +23,7 @@ import Data.List (concat)
 
 import Gargantext.Prelude
 
-import Gargantext.Text.Metrics.Occurrences (occurrences, cooc)
+import Gargantext.Text.Metrics.Count (occurrences, cooc)
 import Gargantext.Text.Terms (TermType(Multi), terms)
 import Gargantext.Core (Lang(EN))
 import Gargantext.Core.Types (Terms(..))
@@ -33,29 +34,35 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
 
 
 metrics_text :: Text
-metrics_text = "A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table."
+metrics_text = T.concat ["A table is an object."
+                        ,"A glas is an object too."
+                        ,"Using a glas to dring is a function."
+                        ,"Using a spoon to eat is a function."
+                        ,"The spoon is an object to eat."
+                        ]
+
+metrics_sentences' :: [Text]
+metrics_sentences' = splitBy (Sentences 0) metrics_text
 
 -- | Sentences 
 metrics_sentences :: [Text]
-metrics_sentences = [ "A table is an object."
-                    , "A glas is an object."
-                    , "The glas is on the table."
-                    , "The spoon is an object."
-                    , "The spoon is on the table."
-                    ]
+metrics_sentences = ["A table is an object."
+                    ,"A glas is an object too."
+                    ,"The glas and the spoon are on the table."
+                    ,"The spoon is an object to eat."
+                    ,"The spoon is on the table and the plate and the glas."]
 
 
-metrics_sentences_Test = splitBy (Sentences 0) metrics_text == metrics_sentences
+metrics_sentences_Test = metrics_sentences == metrics_sentences'
 
 -- | Terms reordered to visually check occurrences
-metrics_terms :: [[[Text]]]
-metrics_terms = [[["table"],["object"]                  ]
-                ,[          ["object"],["glas"]         ]
-                ,[["table"],           ["glas"]         ]
-                ,[          ["object"],        ["spoon"]]
-                ,[["table"],                   ["spoon"]]
-                ]
---metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
+metrics_terms :: [[Text]]
+metrics_terms = undefined
+
+metrics_terms' :: IO [[Terms]]
+metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
+
+--metrics_terms_Test =  metrics_terms == ((map _terms_label) <$> metrics_terms')
 
 -- | Occurrences
 {-
@@ -78,5 +85,3 @@ metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object
 
 
-
-
diff --git a/src/Gargantext/Text/Metrics/Occurrences.hs b/src/Gargantext/Text/Metrics/Count.hs
similarity index 86%
rename from src/Gargantext/Text/Metrics/Occurrences.hs
rename to src/Gargantext/Text/Metrics/Count.hs
index dbfc692f..eccc04c9 100644
--- a/src/Gargantext/Text/Metrics/Occurrences.hs
+++ b/src/Gargantext/Text/Metrics/Count.hs
@@ -1,5 +1,5 @@
 {-|
-Module      : Gargantext.Text.Metrics.Occurrences
+Module      : Gargantext.Text.Metrics.Count
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -25,7 +25,7 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 
-module Gargantext.Text.Metrics.Occurrences
+module Gargantext.Text.Metrics.Count
   where
 
 
@@ -71,7 +71,6 @@ type Grouped = Stems
 --Î»:   cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
 --fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
 ---- 
-
            -}
 
 type Occs = Int
@@ -81,10 +80,16 @@ removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int
 removeApax = DMS.filter (> 1)
 
 cooc :: [[Terms]] -> Map (Label, Label) Int
-cooc tss =
-  mapKeys (delta $ labelPolicy terms_occs) $ coocOn _terms_stem tss
+cooc tss = coocOnWithLabel _terms_stem (labelPolicy terms_occs) tss
   where
     terms_occs = occurrencesOn _terms_stem (List.concat tss)
+
+
+coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
+                                      -> [[a]] -> Map (label, label) Coocs
+coocOnWithLabel on policy tss =
+  mapKeys (delta policy) $ coocOn on tss
+  where
     delta f = f *** f
 
 
@@ -95,16 +100,16 @@ labelPolicy m g =  case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
 
 coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
 coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
-
-coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
-coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
   where
-      ts' = List.nub $ map f ts
-      xs = [ ((x, y), 1)
-           | x <- ts'
-           , y <- ts'
-           , x < y
-           ]
+    coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
+    coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
+      where
+          ts' = List.nub $ map f ts
+          xs = [ ((x, y), 1)
+               | x <- ts'
+               , y <- ts'
+               , x < y
+               ]
 
 
 -- | Compute the grouped occurrences (occ)
@@ -115,6 +120,7 @@ occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
 occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
 
 -- TODO add groups and filter stops
+
 sumOcc :: Ord a => [Occ a] -> Occ a
 sumOcc xs = foldl' (unionWith (+)) empty xs
 
diff --git a/src/Gargantext/Viz/Graph/Distances/Matrice.hs b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
index 1aa4db24..f010d5dd 100644
--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -46,7 +46,7 @@ import Data.Maybe (Maybe(Just))
 import qualified Gargantext.Prelude as P
 import qualified Data.Array.Accelerate.Array.Representation     as Repr
 
-import Gargantext.Text.Metrics.Occurrences
+import Gargantext.Text.Metrics.Count
 
 
 -----------------------------------------------------------------------
-- 
2.47.0


From 42ab55b919c26ac11d3a7e8c6ca724ac189f8bdb Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Sat, 2 Jun 2018 22:58:08 +0200
Subject: [PATCH 14/16] [FIX] Corenlp lems -> using stem.

---
 src/Gargantext/Text/Terms/Multi.hs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Gargantext/Text/Terms/Multi.hs b/src/Gargantext/Text/Terms/Multi.hs
index f92dd61a..ee676498 100644
--- a/src/Gargantext/Text/Terms/Multi.hs
+++ b/src/Gargantext/Text/Terms/Multi.hs
@@ -18,23 +18,27 @@ module Gargantext.Text.Terms.Multi (multiterms)
 
 import Data.Text hiding (map, group, filter, concat)
 import Data.List (concat)
+import qualified Data.Set as S
 
 import Gargantext.Prelude
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Types
 
 import Gargantext.Text.Terms.Multi.PosTagging
+import Gargantext.Text.Terms.Mono.Stem (stem)
 import qualified Gargantext.Text.Terms.Multi.Lang.En as En
 import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr
 
 multiterms :: Lang -> Text -> IO [Terms]
 multiterms lang txt = concat
-                   <$> map (map tokenTag2terms)
+                   <$> map (map (tokenTag2terms lang))
                    <$> map (filter (\t -> _my_token_pos t == Just NP)) 
                    <$> tokenTags lang txt
 
-tokenTag2terms :: TokenTag -> Terms
-tokenTag2terms (TokenTag w t _ _) =  Terms w t
+tokenTag2terms :: Lang -> TokenTag -> Terms
+tokenTag2terms lang (TokenTag w t _ _) =  Terms w t'
+  where
+    t' = S.fromList $ map (stem lang) $ S.toList t
 
 tokenTags :: Lang -> Text -> IO [[TokenTag]]
 tokenTags lang s = map (group lang) <$> tokenTags' lang s
-- 
2.47.0


From 58c1c24985421cd1d4e5bb422eaa196aa014bebd Mon Sep 17 00:00:00 2001
From: Nicolas Pouillard <nicolas.pouillard@gmail.com>
Date: Mon, 4 Jun 2018 13:43:52 +0200
Subject: [PATCH 15/16] Basic polymorphic version of FrequentItemSet

---
 .../Text/Metrics/FrequentItemSet.hs           | 61 +++++++++++++------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/src/Gargantext/Text/Metrics/FrequentItemSet.hs b/src/Gargantext/Text/Metrics/FrequentItemSet.hs
index f4ab2b84..105250c1 100644
--- a/src/Gargantext/Text/Metrics/FrequentItemSet.hs
+++ b/src/Gargantext/Text/Metrics/FrequentItemSet.hs
@@ -14,42 +14,48 @@ Domain Specific Language to manage Frequent Item Set (FIS)
 {-# LANGUAGE NoImplicitPrelude #-}
 
 module Gargantext.Text.Metrics.FrequentItemSet
-  ( Fis, Size
+  ( Fis, Size(..)
   , occ_hlcm, cooc_hlcm
   , all, between
   , module HLCM
   )
   where
 
-import Data.List (tail, filter)
-import Data.Either
+import Prelude (Functor(..)) -- TODO
+import qualified Data.Map.Strict as Map
+import Data.Map.Strict (Map)
+import qualified Data.Set as Set
+import Data.Set (Set)
+import qualified Data.Vector as V
+import Data.Vector (Vector)
+
+import Data.List (filter, concat)
+import Data.Maybe (catMaybes)
 
 import HLCM
 
 import Gargantext.Prelude
 
-type Size = Either Int (Int, Int)
-
---data Size = Point | Segment
+data Size = Point Int | Segment Int Int
 
 ------------------------------------------------------------------------
 -- | Occurrence is Frequent Item Set of size 1
 occ_hlcm  :: Frequency -> [[Item]] -> [Fis]
-occ_hlcm f is  = fisWithSize (Left 1) f is
+occ_hlcm = fisWithSize (Point 1)
 
 -- | Cooccurrence is Frequent Item Set of size 2
 cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
-cooc_hlcm f is = fisWithSize (Left 2) f is
+cooc_hlcm = fisWithSize (Point 2)
 
 all :: Frequency -> [[Item]] -> [Fis]
-all f is  = fisWith Nothing f is
+all = fisWith Nothing
 
 ------------------------------------------------------------------------
 between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
-between (x,y) f is = fisWithSize (Right (x,y)) f is
+between (x,y) = fisWithSize (Segment x y)
 
 --maximum :: Int -> Frequency -> [[Item]] -> [Fis]
---maximum m f is = between (0,m) f is
+--maximum m = between (0,m)
 
 
 ------------------------------------------------------------------------
@@ -62,31 +68,48 @@ data Fis' a = Fis' { _fisCount   :: Int
                    , _fisItemSet :: [a]
                    } deriving (Show)
 
+instance Functor Fis' where
+  fmap f (Fis' c is) = Fis' c (fmap f is)
+
 -- | Sugar from items to FIS
 items2fis :: [Item] -> Maybe Fis
-items2fis is = case head is of
-                 Nothing -> Nothing
-                 Just h  -> Just (Fis' h (tail is))
+items2fis []     = Nothing
+items2fis (i:is) = Just $ Fis' i is
 
 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
 
 fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
 fisWithSize n f is = case n of
-  Left   n'   -> fisWith (Just (\x -> length x == (n'+1)    )) f is
-  Right (a,b) -> fisWith (Just (\x -> cond1 a x && cond2 b x)) f is
+  Point  n'   -> fisWith (Just (\x -> length x == (n'+1) )) f is
+  Segment a b -> fisWith (Just (\x -> cond a (length x) b)) f is
                     where
-                      cond1 a' x = length x >= a'
-                      cond2 b' x = length x <= b'
+                      cond a' x b' = a' <= x && x <= b'
 
 
 fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
-fisWith s f is = unMaybe $ map items2fis $ filter' $ runLCMmatrix is f
+fisWith s f is = catMaybes $ map items2fis $ filter' $ runLCMmatrix is f
   where
     filter' = case s of
                 Nothing  -> identity
                 Just fun -> filter fun
 
+-- Here the sole purpose to take the keys as a Set is tell we do not want
+-- duplicates.
+fisWithSizePoly :: Ord a => Size -> Frequency -> Set a -> [[a]] -> [Fis' a]
+fisWithSizePoly n f ks = map (fmap fromItem) . fisWithSize n f . map (map toItem)
+  where
+    ksv = V.fromList $ Set.toList ks
+    ksm = Map.fromList . flip zip [0..] $ V.toList ksv
+    toItem = (ksm Map.!)
+    fromItem = (ksv V.!)
+
+fisWithSizePoly2 :: Ord a => Size -> Frequency -> [[a]] -> [Fis' a]
+fisWithSizePoly2 n f is = fisWithSizePoly n f ks is
+  where
+    ks = Set.fromList $ concat is
+
+
 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
 
-- 
2.47.0


From 945d8e2e6e82e8b8dfaad8cc55fa237b39c356d3 Mon Sep 17 00:00:00 2001
From: Nicolas Pouillard <nicolas.pouillard@gmail.com>
Date: Mon, 4 Jun 2018 13:45:02 +0200
Subject: [PATCH 16/16] Minor build/packaging tweaks

---
 package.yaml                   | 2 +-
 src/Gargantext/Text/Metrics.hs | 1 +
 stack.yaml                     | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/package.yaml b/package.yaml
index 2de9e6a0..4b5a26cd 100644
--- a/package.yaml
+++ b/package.yaml
@@ -35,7 +35,6 @@ library:
   dependencies:
   - QuickCheck
   - accelerate
-  - accelerate-io
   - aeson
   - aeson-lens
   - aeson-pretty
@@ -46,6 +45,7 @@ library:
   - bytestring
   - case-insensitive
   - cassava
+  - clustering-louvain
   - conduit
   - conduit-extra
   - containers
diff --git a/src/Gargantext/Text/Metrics.hs b/src/Gargantext/Text/Metrics.hs
index 925dea93..08a2db87 100644
--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
@@ -11,6 +11,7 @@ Mainly reexport functions in @Data.Text.Metrics@
 -}
 
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
 
 module Gargantext.Text.Metrics where
 
diff --git a/stack.yaml b/stack.yaml
index 23363269..d0c227db 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -14,7 +14,7 @@ extra-deps:
 - git: https://github.com/delanoe/servant-static-th.git
   commit: fff77e79fe94d563ab5cae2609b78c17b5c1f434
 - accelerate-1.2.0.0
-- accelerate-io-1.2.0.0
+- hashtables-1.2.3.0 # needed by accelerate-1.2.0.0
 - aeson-1.2.4.0
 - aeson-lens-0.5.0.0
 - duckling-0.1.3.0
-- 
2.47.0