{-|
Module      : Gargantext.Database.TextSearch
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Here is a longer description of this module, containing some
commentary with @some markup@.
-}

{-# LANGUAGE NoImplicitPrelude #-}
--{-# LANGUAGE OverloadedStrings #-}

{-# LANGUAGE QuasiQuotes #-}

module Gargantext.Database.Cooc where

import Control.Monad ((>>=))
import Database.PostgreSQL.Simple
import Database.PostgreSQL.Simple.SqlQQ

import Gargantext.Prelude
import Gargantext (connectGargandb)

type CorpusId    = Int
type MainListId  = Int
type GroupListId = Int

coocTest :: IO [(Int, Int, Int)]
coocTest = connectGargandb "gargantext.ini"
  >>= \conn -> dBcooc conn 421968 446602 446599 

dBcooc :: Connection -> CorpusId -> MainListId -> GroupListId -> IO [(Int, Int, Int)]
dBcooc conn corpus mainList groupList = query conn [sql|
  set work_mem='1GB';

  --EXPLAIN ANALYZE
      WITH COOC as (
      SELECT
      COALESCE(grA.ngram1_id, wlA.ngram_id) as ngA,
      COALESCE(grB.ngram1_id, wlB.ngram_id) as ngB,
      COUNT(*) AS score
      FROM
      nodes AS n
      --      /     --     X   Y
      -- SQL graph for getting the cooccurrences

          -- STEP 1: X axis of the matrix
          INNER JOIN nodes_ngrams
                  AS ngA  ON ngA.node_id  = n.id
          -- \--> get the occurrences node/ngram of the corpus

              INNER JOIN nodes_ngrams
                      AS wlA  ON ngA.ngram_id = wlA.ngram_id
                             AND wlA.node_id  = ?
              -- \--> filter with white/main list (typename 7)

          LEFT JOIN  nodes_ngrams_ngrams
                 AS grA  ON wlA.ngram_id = grA.ngram1_id
                        AND grA.node_id  = ?
          -- \--> adding (joining) ngrams that are grouped (typename 6)
          LEFT JOIN  nodes_ngrams
                 AS wlAA ON grA.ngram2_id = wlAA.ngram_id
                        AND wlAA.node_id  = wlA.node_id
          -- \--> adding (joining) ngrams that are not grouped
          --LEFT JOIN  ngrams        AS wlAA ON grA.ngram2_id = wlAA.id
          -- \--> for joining all synonyms even if they are not in the main list (white list)


          -- STEP 2: Y axi of the matrix
          INNER JOIN nodes_ngrams
                  AS ngB  ON ngB.node_id  = n.id
          -- \--> get the occurrences node/ngram of the corpus

              INNER JOIN nodes_ngrams
                      AS wlB  ON ngB.ngram_id = wlB.ngram_id
                             AND wlB.node_id  = ?
              -- \--> filter with white/main list

          LEFT JOIN  nodes_ngrams_ngrams
                 AS grB  ON wlB.ngram_id = grB.ngram1_id
                        AND grB.node_id  = ?
          -- \--> adding (joining) ngrams that are grouped
          LEFT JOIN  nodes_ngrams
                 AS wlBB ON grB.ngram2_id = wlBB.ngram_id
                        AND wlBB.node_id   = wlB.node_id
          -- \--> adding (joining) ngrams that are not grouped

          -- LEFT JOIN  ngrams        AS wlBB ON grB.ngram2_id = wlBB.id
          -- \--> for joining all synonyms even if they are not in the main list (white list)

          WHERE
              n.typename  = 4
          AND n.parent_id = ?
          GROUP BY 1,2
          --    ==
          -- GROUP BY ngA, ngB
          )

      SELECT ngA, ngB, score
          FROM COOC    --> from the query above

   WHERE score >= 3
  AND 
  ngA <= ngB
  |] (mainList, groupList, mainList, groupList, corpus)