2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
15 module Gargantext.Database.Action.Flow.Pairing
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..), Term)
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery)
30 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
31 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
32 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
33 import Gargantext.Database.Admin.Config (nodeTypeId)
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude hiding (sum)
37 import qualified Data.List as List
38 import qualified Data.Map as Map
39 import qualified Data.Text as DT
40 import qualified Data.Set as Set
45 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
46 -- isPairedWith NodeAnnuaire corpusId
47 isPairedWith :: NodeType -> NodeId -> Cmd err [NodeId]
48 isPairedWith nt nId = runOpaQuery (selectQuery nt nId)
50 selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
51 selectQuery nt' nId' = proc () -> do
52 (node, node_node) <- queryJoin -< ()
53 restrict -< (node^.node_typename) .== (pgInt4 $ nodeTypeId nt')
54 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
55 returnA -< node^.node_id
57 queryJoin :: Query (NodeRead, NodeNodeReadNull)
58 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
60 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
65 -----------------------------------------------------------------------
66 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
68 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
69 insertDB $ prepareInsert dataPaired
72 dataPairing :: AnnuaireId
73 -> (CorpusId, ListId, NgramsType)
74 -> (ContactName -> Projected)
75 -> (DocAuthor -> Projected)
76 -> Cmd err (Map ContactId (Set DocId))
77 dataPairing aId (cId, lId, ngt) fc fa = do
78 mc <- getNgramsContactId aId
79 md <- getNgramsDocId cId lId ngt
82 from = projectionFrom (Set.fromList $ Map.keys mc) fc
83 to = projectionTo (Set.fromList $ Map.keys md) fa
85 pure $ fusion mc $ align from to md
89 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
90 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
92 $ map (\(contactId, setDocIds)
94 -> (contactId, setDocId)
95 ) $ Set.toList setDocIds
99 ------------------------------------------------------------------------
100 type ContactName = Text
101 type DocAuthor = Text
102 type Projected = Text
104 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
105 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
107 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
108 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
110 ------------------------------------------------------------------------
111 lastName :: Term -> Term
112 lastName texte = DT.toLower
113 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
116 lastName' = lastMay . DT.splitOn " "
119 ------------------------------------------------------------------------
120 align :: Map ContactName Projected
121 -> Map Projected (Set DocAuthor)
122 -> Map DocAuthor (Set DocId)
123 -> Map ContactName (Set DocId)
124 align mc ma md = fromListWith (<>)
125 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
128 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
129 getProjection ma' sa' =
132 else Set.unions $ sets ma' sa'
134 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
135 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
137 testProjection :: ContactName
138 -> Map ContactName Projected
139 -> Map Projected (Set DocAuthor)
141 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
143 Just c -> case Map.lookup c ma' of
147 fusion :: Map ContactName (Set ContactId)
148 -> Map ContactName (Set DocId)
149 -> Map ContactId (Set DocId)
150 fusion mc md = Map.fromListWith (<>)
152 $ [ (,) <$> Just cId <*> Map.lookup cn md
153 | (cn, setContactId) <- Map.toList mc
154 , cId <- Set.toList setContactId
156 ------------------------------------------------------------------------
158 getNgramsContactId :: AnnuaireId
159 -> Cmd err (Map ContactName (Set NodeId))
160 getNgramsContactId aId = do
161 contacts <- getAllContacts aId
162 pure $ fromListWith (<>)
164 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
165 <*> Just ( Set.singleton (contact^.node_id))
169 -- filter Trash / map Authors
170 -- Indexing all ngramsType like Authors
171 getNgramsDocId :: CorpusId
174 -> Cmd err (Map DocAuthor (Set NodeId))
175 getNgramsDocId corpusId listId nt
177 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
178 <$> selectNgramsDocId corpusId listId nt
181 selectNgramsDocId :: CorpusId
184 -> Cmd err [(Text, Int)]
185 selectNgramsDocId corpusId' listId' ngramsType' =
186 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
188 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
189 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
190 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
192 WHERE nn.node1_id = ?
193 AND nnng.node1_id = ?
194 AND nnng.ngrams_type = ?