2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 -- {-# LANGUAGE Arrows #-}
15 module Gargantext.Database.Action.Flow.Pairing
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..))
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
30 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
31 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Prelude hiding (sum)
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37 import qualified Data.Text as DT
38 import qualified Data.Set as Set
40 -- TODO mv this type in Types Main
46 pairingPolicy :: (Terms -> Terms)
49 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
52 pairMaps :: Map (NgramsT Ngrams) a
53 -> Map (NgramsT Ngrams) NgramsId
54 -> Map NgramsIndexed (Map NgramsType a)
57 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
58 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
59 , Just nId <- [DM.lookup k m2]
63 -----------------------------------------------------------------------
65 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
67 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
68 insertDB $ prepareInsert dataPaired
71 dataPairing :: AnnuaireId
72 -> (CorpusId, ListId, NgramsType)
73 -> (ContactName -> Projected)
74 -> (DocAuthor -> Projected)
75 -> Cmd err (Map ContactId (Set DocId))
76 dataPairing aId (cId, lId, ngt) fc fa = do
77 mc <- getNgramsContactId aId
78 md <- getNgramsDocId cId lId ngt
81 from = projectionFrom (Set.fromList $ Map.keys mc) fc
82 to = projectionTo (Set.fromList $ Map.keys md) fa
84 pure $ fusion mc $ align from to md
88 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
89 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
91 $ map (\(contactId, setDocIds)
93 -> (contactId, setDocId)
94 ) $ Set.toList setDocIds
100 ------------------------------------------------------------------------
101 type ContactName = Text
102 type DocAuthor = Text
103 type Projected = Text
105 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
106 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
108 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
109 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
111 ------------------------------------------------------------------------
112 lastName :: Terms -> Terms
113 lastName texte = DT.toLower
114 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
117 lastName' = lastMay . DT.splitOn " "
120 ------------------------------------------------------------------------
121 align :: Map ContactName Projected
122 -> Map Projected (Set DocAuthor)
123 -> Map DocAuthor (Set DocId)
124 -> Map ContactName (Set DocId)
125 align mc ma md = fromListWith (<>)
126 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
129 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
130 getProjection ma' sa' =
133 else Set.unions $ sets ma' sa'
135 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
136 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
138 testProjection :: ContactName
139 -> Map ContactName Projected
140 -> Map Projected (Set DocAuthor)
142 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
144 Just c -> case Map.lookup c ma' of
148 fusion :: Map ContactName (Set ContactId)
149 -> Map ContactName (Set DocId)
150 -> Map ContactId (Set DocId)
151 fusion mc md = Map.fromListWith (<>)
153 $ [ (,) <$> Just cId <*> Map.lookup cn md
154 | (cn, setContactId) <- Map.toList mc
155 , cId <- Set.toList setContactId
157 ------------------------------------------------------------------------
159 getNgramsContactId :: AnnuaireId
160 -> Cmd err (Map ContactName (Set NodeId))
161 getNgramsContactId aId = do
162 contacts <- getAllContacts aId
163 pure $ fromListWith (<>)
165 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
166 <*> Just ( Set.singleton (contact^.node_id))
170 -- filter Trash / map Authors
171 -- Indexing all ngramsType like Authors
172 getNgramsDocId :: CorpusId
175 -> Cmd err (Map DocAuthor (Set NodeId))
176 getNgramsDocId corpusId listId nt
178 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
179 <$> selectNgramsDocId corpusId listId nt
182 selectNgramsDocId :: CorpusId
185 -> Cmd err [(Text, Int)]
186 selectNgramsDocId corpusId' listId' ngramsType' =
187 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
189 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
190 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
191 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
193 WHERE nn.node1_id = ?
194 AND nnng.node1_id = ?
195 AND nnng.ngrams_type = ?