2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 -- {-# LANGUAGE Arrows #-}
15 module Gargantext.Database.Action.Flow.Pairing
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..), Term)
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
30 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
31 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Prelude hiding (sum)
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37 import qualified Data.Text as DT
38 import qualified Data.Set as Set
41 -----------------------------------------------------------------------
42 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
44 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
45 insertDB $ prepareInsert dataPaired
48 dataPairing :: AnnuaireId
49 -> (CorpusId, ListId, NgramsType)
50 -> (ContactName -> Projected)
51 -> (DocAuthor -> Projected)
52 -> Cmd err (Map ContactId (Set DocId))
53 dataPairing aId (cId, lId, ngt) fc fa = do
54 mc <- getNgramsContactId aId
55 md <- getNgramsDocId cId lId ngt
58 from = projectionFrom (Set.fromList $ Map.keys mc) fc
59 to = projectionTo (Set.fromList $ Map.keys md) fa
61 pure $ fusion mc $ align from to md
65 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
66 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
68 $ map (\(contactId, setDocIds)
70 -> (contactId, setDocId)
71 ) $ Set.toList setDocIds
75 ------------------------------------------------------------------------
76 type ContactName = Text
80 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
81 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
83 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
84 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
86 ------------------------------------------------------------------------
87 lastName :: Term -> Term
88 lastName texte = DT.toLower
89 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
92 lastName' = lastMay . DT.splitOn " "
95 ------------------------------------------------------------------------
96 align :: Map ContactName Projected
97 -> Map Projected (Set DocAuthor)
98 -> Map DocAuthor (Set DocId)
99 -> Map ContactName (Set DocId)
100 align mc ma md = fromListWith (<>)
101 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
104 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
105 getProjection ma' sa' =
108 else Set.unions $ sets ma' sa'
110 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
111 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
113 testProjection :: ContactName
114 -> Map ContactName Projected
115 -> Map Projected (Set DocAuthor)
117 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
119 Just c -> case Map.lookup c ma' of
123 fusion :: Map ContactName (Set ContactId)
124 -> Map ContactName (Set DocId)
125 -> Map ContactId (Set DocId)
126 fusion mc md = Map.fromListWith (<>)
128 $ [ (,) <$> Just cId <*> Map.lookup cn md
129 | (cn, setContactId) <- Map.toList mc
130 , cId <- Set.toList setContactId
132 ------------------------------------------------------------------------
134 getNgramsContactId :: AnnuaireId
135 -> Cmd err (Map ContactName (Set NodeId))
136 getNgramsContactId aId = do
137 contacts <- getAllContacts aId
138 pure $ fromListWith (<>)
140 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
141 <*> Just ( Set.singleton (contact^.node_id))
145 -- filter Trash / map Authors
146 -- Indexing all ngramsType like Authors
147 getNgramsDocId :: CorpusId
150 -> Cmd err (Map DocAuthor (Set NodeId))
151 getNgramsDocId corpusId listId nt
153 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
154 <$> selectNgramsDocId corpusId listId nt
157 selectNgramsDocId :: CorpusId
160 -> Cmd err [(Text, Int)]
161 selectNgramsDocId corpusId' listId' ngramsType' =
162 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
164 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
165 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
166 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
168 WHERE nn.node1_id = ?
169 AND nnng.node1_id = ?
170 AND nnng.ngrams_type = ?