Stability : experimental
Portability : POSIX
-# Spécifications for pairing
-
-database:
-
-add NodeType Community (instead of texts, contacts)
-
-nodes_nodes
-corpusId_communitId
-
-get defaultList Id of each (for now)
-
-corpusId_docId
-listId_ngramsId (authors)
-
-listId_docId_ngramsId
-listId_contactId_ngramsId'
-
-if isSame ngramsId ngramsId'
- then
- insert listId_docId_contactId
- else
- nothing
-
-
-}
-{-# LANGUAGE QuasiQuotes #-}
--- {-# LANGUAGE Arrows #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE Arrows #-}
module Gargantext.Database.Action.Flow.Pairing
- (pairing)
+ -- (pairing)
where
import Control.Lens (_Just, (^.))
-import Data.Map (Map, fromList)
-import Data.Maybe (catMaybes)
-import Data.Text (Text, toLower)
-import Database.PostgreSQL.Simple.SqlQQ (sql)
+import Data.HashMap.Strict (HashMap)
+import qualified Data.HashMap.Strict as HM
+import Data.Maybe (catMaybes, fromMaybe)
+import Data.Set (Set)
+import Gargantext.API.Ngrams.Tools
+import Gargantext.API.Ngrams.Types (NgramsTerm(..))
+import Gargantext.API.Prelude (GargNoServer)
+import Gargantext.Core
import Gargantext.Core.Types (TableResult(..))
-import Gargantext.Database.Action.Flow.Utils
-import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
-import Gargantext.Database.Prelude (Cmd, runPGSQuery)
-import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
+import Gargantext.Core.Types.Main
+import Gargantext.Database
+import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
+import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
+import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
+import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
+import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
+import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
+import Gargantext.Database.Query.Table.Node (defaultList)
+import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
+import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum)
-import Safe (lastMay)
-import qualified Data.Map as DM
-import qualified Data.Text as DT
-
--- TODO mv this type in Types Main
-type Terms = Text
-
-{-
-pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
-pairing'' = undefined
-
-pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
-pairing' = undefined
--}
-
--- | TODO : add paring policy as parameter
-pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
- -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
- -> ListId
- -> Cmd err Int
-pairing cId aId lId = do
- contacts' <- getAllContacts aId
- let contactsMap = pairingPolicyToMap toLower
- $ toMaps extractNgramsT (tr_docs contacts')
-
- ngramsMap' <- getNgramsTindexed cId Authors
- let ngramsMap = pairingPolicyToMap lastName ngramsMap'
-
- let indexedNgrams = pairMaps contactsMap ngramsMap
-
- insertDocNgrams lId indexedNgrams
-
-lastName :: Terms -> Terms
-lastName texte = DT.toLower
- $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
+import Opaleye
+import qualified Data.HashMap.Strict as HashMap
+import qualified Data.List as List
+import qualified Data.Set as Set
+import qualified Data.Text as DT
+
+-- | isPairedWith
+-- All NodeAnnuaire paired with a Corpus of NodeId nId:
+-- isPairedWith NodeAnnuaire corpusId
+isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
+isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
where
- lastName' = lastMay . DT.splitOn " "
+ selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
+ selectQuery nt' nId' = proc () -> do
+ (node, node_node) <- queryJoin -< ()
+ restrict -< (node^.node_typename) .== (pgInt4 $ toDBid nt')
+ restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
+ returnA -< node^.node_id
+
+ queryJoin :: Query (NodeRead, NodeNodeReadNull)
+ queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
+ where
+ cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
--- TODO: this method is dangerous (maybe equalities of the result are
--- not taken into account emergency demo plan...)
-pairingPolicyToMap :: (Terms -> Terms)
- -> Map (NgramsT Ngrams) a
- -> Map (NgramsT Ngrams) a
-pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
-
-pairingPolicy :: (Terms -> Terms)
- -> NgramsT Ngrams
- -> NgramsT Ngrams
-pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
-
--- | TODO : use Occurrences in place of Int
-extractNgramsT :: HyperdataContact
- -> Map (NgramsT Ngrams) Int
-extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
+-----------------------------------------------------------------------
+pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
+pairing a c l' = do
+ l <- case l' of
+ Nothing -> defaultList c
+ Just l'' -> pure l''
+ dataPaired <- dataPairing a (c,l,Authors) takeName takeName
+ r <- insertDB $ prepareInsert dataPaired
+ _ <- insertNodeNode [ NodeNode c a Nothing Nothing]
+ pure r
+
+
+dataPairing :: AnnuaireId
+ -> (CorpusId, ListId, NgramsType)
+ -> (ContactName -> Projected)
+ -> (DocAuthor -> Projected)
+ -> GargNoServer (HashMap ContactId (Set DocId))
+dataPairing aId (cId, lId, ngt) fc fa = do
+ mc <- getNgramsContactId aId
+ md <- getNgramsDocId cId lId ngt
+
+ printDebug "ngramsContactId" mc
+ printDebug "ngramsDocId" md
+ let
+ from = projectionFrom (Set.fromList $ HM.keys mc) fc
+ to = projectionTo (Set.fromList $ HM.keys md) fa
+
+ pure $ fusion mc $ align from to md
+
+
+
+prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
+prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
+ $ List.concat
+ $ map (\(contactId, setDocIds)
+ -> map (\setDocId
+ -> (contactId, setDocId)
+ ) $ Set.toList setDocIds
+ )
+ $ HM.toList m
+
+------------------------------------------------------------------------
+type ContactName = NgramsTerm
+type DocAuthor = NgramsTerm
+type Projected = NgramsTerm
+
+projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
+projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
+
+projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
+projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
+------------------------------------------------------------------------
+takeName :: NgramsTerm -> NgramsTerm
+takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
where
- authors = map text2ngrams
- $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
-
+ texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
+ (lastName' texte)
+ lastName' = lastMay . DT.splitOn " "
-pairMaps :: Map (NgramsT Ngrams) a
- -> Map (NgramsT Ngrams) NgramsId
- -> Map NgramsIndexed (Map NgramsType a)
-pairMaps m1 m2 =
- DM.fromList
- [ (NgramsIndexed ng nId, DM.singleton nt n2i)
- | (k@(NgramsT nt ng),n2i) <- DM.toList m1
- , Just nId <- [DM.lookup k m2]
- ]
------------------------------------------------------------------------
-getNgramsTindexed :: CorpusId
- -> NgramsType
- -> Cmd err (Map (NgramsT Ngrams) NgramsId)
-getNgramsTindexed corpusId ngramsType' = fromList
- <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
- <$> selectNgramsTindexed corpusId ngramsType'
+------------------------------------------------------------------------
+align :: HashMap ContactName Projected
+ -> HashMap Projected (Set DocAuthor)
+ -> HashMap DocAuthor (Set DocId)
+ -> HashMap ContactName (Set DocId)
+align mc ma md = HM.fromListWith (<>)
+ $ map (\c -> (c, getProjection md $ testProjection c mc ma))
+ $ HM.keys mc
where
- selectNgramsTindexed :: CorpusId
- -> NgramsType
- -> Cmd err [(NgramsId, Terms, Int)]
- selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
- where
- selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
- JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
- -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
- JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
-
- WHERE nn.node1_id = ?
- AND occ.ngrams_type = ?
- AND occ.node2_id = nn.node2_id
- GROUP BY n.id;
- |]
-
-{- | TODO more typed SQL queries
-selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
-selectNgramsTindexed corpusId ngramsType = proc () -> do
- nodeNode <- queryNodeNodeTable -< ()
- nodeNgrams <- queryNodesNgramsTable -< ()
- ngrams <- queryNgramsTable -< ()
-
- restrict -< node1_id nodeNode .== pgInt4 corpusId
- restrict -< node2_id nodeNode .== node_id nodeNgrams
- restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
-
- result <- aggregate groupBy (ngrams_id ngrams)
- returnA -< result
---}
+ getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
+ getProjection ma' sa' =
+ if Set.null sa'
+ then Set.empty
+ else Set.unions $ sets ma' sa'
+ where
+ sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
+ lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
+
+ testProjection :: ContactName
+ -> HashMap ContactName Projected
+ -> HashMap Projected (Set DocAuthor)
+ -> Set DocAuthor
+ testProjection cn' mc' ma' = case HM.lookup cn' mc' of
+ Nothing -> Set.empty
+ Just c -> case HM.lookup c ma' of
+ Nothing -> Set.empty
+ Just a -> a
+
+fusion :: HashMap ContactName (Set ContactId)
+ -> HashMap ContactName (Set DocId)
+ -> HashMap ContactId (Set DocId)
+fusion mc md = HM.fromListWith (<>)
+ $ catMaybes
+ $ [ (,) <$> Just cId <*> HM.lookup cn md
+ | (cn, setContactId) <- HM.toList mc
+ , cId <- Set.toList setContactId
+ ]
+------------------------------------------------------------------------
+
+getNgramsContactId :: AnnuaireId
+ -> Cmd err (HashMap ContactName (Set NodeId))
+getNgramsContactId aId = do
+ contacts <- getAllContacts aId
+ pure $ HM.fromListWith (<>)
+ $ catMaybes
+ $ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
+ <*> Just ( Set.singleton (contact^.node_id))
+ ) (tr_docs contacts)
+
+
+getNgramsDocId :: CorpusId
+ -> ListId
+ -> NgramsType
+ -> GargNoServer (HashMap DocAuthor (Set NodeId))
+getNgramsDocId cId lId nt = do
+ repo <- getRepo
+ lIds <- selectNodesWithUsername NodeList userMaster
+ let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
+
+ groupNodesByNgrams ngs
+ <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)