]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[Crypto] files now in G.Core.Crypto
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Data.Set (Set)
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..), Term)
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery)
30 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
31 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
32 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
33 import Gargantext.Database.Admin.Config (nodeTypeId)
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude hiding (sum)
36 import Safe (lastMay)
37 import qualified Data.List as List
38 import qualified Data.Map as Map
39 import qualified Data.Text as DT
40 import qualified Data.Set as Set
41 import Opaleye
42
43
44 -- | isPairedWith
45 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
46 -- isPairedWith NodeAnnuaire corpusId
47 isPairedWith :: NodeType -> NodeId -> Cmd err [NodeId]
48 isPairedWith nt nId = runOpaQuery (selectQuery nt nId)
49 where
50 selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
51 selectQuery nt' nId' = proc () -> do
52 (node, node_node) <- queryJoin -< ()
53 restrict -< (node^.node_typename) .== (pgInt4 $ nodeTypeId nt')
54 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
55 returnA -< node^.node_id
56
57 queryJoin :: Query (NodeRead, NodeNodeReadNull)
58 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
59 where
60 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
61
62
63
64
65 -----------------------------------------------------------------------
66 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
67 pairing a c l = do
68 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
69 insertDB $ prepareInsert dataPaired
70
71
72 dataPairing :: AnnuaireId
73 -> (CorpusId, ListId, NgramsType)
74 -> (ContactName -> Projected)
75 -> (DocAuthor -> Projected)
76 -> Cmd err (Map ContactId (Set DocId))
77 dataPairing aId (cId, lId, ngt) fc fa = do
78 mc <- getNgramsContactId aId
79 md <- getNgramsDocId cId lId ngt
80
81 let
82 from = projectionFrom (Set.fromList $ Map.keys mc) fc
83 to = projectionTo (Set.fromList $ Map.keys md) fa
84
85 pure $ fusion mc $ align from to md
86
87
88
89 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
90 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
91 $ List.concat
92 $ map (\(contactId, setDocIds)
93 -> map (\setDocId
94 -> (contactId, setDocId)
95 ) $ Set.toList setDocIds
96 )
97 $ Map.toList m
98
99 ------------------------------------------------------------------------
100 type ContactName = Text
101 type DocAuthor = Text
102 type Projected = Text
103
104 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
105 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
106
107 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
108 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
109
110 ------------------------------------------------------------------------
111 lastName :: Term -> Term
112 lastName texte = DT.toLower
113 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
114 (lastName' texte)
115 where
116 lastName' = lastMay . DT.splitOn " "
117
118
119 ------------------------------------------------------------------------
120 align :: Map ContactName Projected
121 -> Map Projected (Set DocAuthor)
122 -> Map DocAuthor (Set DocId)
123 -> Map ContactName (Set DocId)
124 align mc ma md = fromListWith (<>)
125 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
126 $ Map.keys mc
127 where
128 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
129 getProjection ma' sa' =
130 if Set.null sa'
131 then Set.empty
132 else Set.unions $ sets ma' sa'
133 where
134 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
135 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
136
137 testProjection :: ContactName
138 -> Map ContactName Projected
139 -> Map Projected (Set DocAuthor)
140 -> Set DocAuthor
141 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
142 Nothing -> Set.empty
143 Just c -> case Map.lookup c ma' of
144 Nothing -> Set.empty
145 Just a -> a
146
147 fusion :: Map ContactName (Set ContactId)
148 -> Map ContactName (Set DocId)
149 -> Map ContactId (Set DocId)
150 fusion mc md = Map.fromListWith (<>)
151 $ catMaybes
152 $ [ (,) <$> Just cId <*> Map.lookup cn md
153 | (cn, setContactId) <- Map.toList mc
154 , cId <- Set.toList setContactId
155 ]
156 ------------------------------------------------------------------------
157
158 getNgramsContactId :: AnnuaireId
159 -> Cmd err (Map ContactName (Set NodeId))
160 getNgramsContactId aId = do
161 contacts <- getAllContacts aId
162 pure $ fromListWith (<>)
163 $ catMaybes
164 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
165 <*> Just ( Set.singleton (contact^.node_id))
166 ) (tr_docs contacts)
167
168 -- | TODO
169 -- filter Trash / map Authors
170 -- Indexing all ngramsType like Authors
171 getNgramsDocId :: CorpusId
172 -> ListId
173 -> NgramsType
174 -> Cmd err (Map DocAuthor (Set NodeId))
175 getNgramsDocId corpusId listId nt
176 = fromListWith (<>)
177 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
178 <$> selectNgramsDocId corpusId listId nt
179
180
181 selectNgramsDocId :: CorpusId
182 -> ListId
183 -> NgramsType
184 -> Cmd err [(Text, Int)]
185 selectNgramsDocId corpusId' listId' ngramsType' =
186 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
187 where
188 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
189 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
190 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
191
192 WHERE nn.node1_id = ?
193 AND nnng.node1_id = ?
194 AND nnng.ngrams_type = ?
195 ;
196 |]