]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
Merge branch 'dev' into dev-corpora-from-write-nodes
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Control.Lens (_Just, (^.))
20 import Data.HashMap.Strict (HashMap)
21 import qualified Data.HashMap.Strict as HM
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Set (Set)
24 import Gargantext.API.Ngrams.Tools
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Prelude (GargNoServer)
27 import Gargantext.Core
28 import Gargantext.Core.Types (TableResult(..))
29 import Gargantext.Core.Types.Main
30 import Gargantext.Database
31 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
32 import Gargantext.Database.Admin.Config
33 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
34 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
35 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
36 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
37 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
38 import Gargantext.Database.Query.Table.Node (defaultList)
39 import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
40 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
41 import Gargantext.Database.Schema.Node
42 import Gargantext.Prelude hiding (sum)
43 import Opaleye
44 import qualified Data.HashMap.Strict as HashMap
45 import qualified Data.List as List
46 import qualified Data.Set as Set
47 import qualified Data.Text as DT
48
49 -- | isPairedWith
50 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
51 -- isPairedWith NodeAnnuaire corpusId
52 isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
53 isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
54 where
55 selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
56 selectQuery nt' nId' = proc () -> do
57 (node, node_node) <- queryJoin -< ()
58 restrict -< (node^.node_typename) .== (sqlInt4 $ toDBid nt')
59 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
60 returnA -< node^.node_id
61
62 queryJoin :: Query (NodeRead, NodeNodeReadNull)
63 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
64 where
65 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
66
67 -----------------------------------------------------------------------
68 pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
69 pairing a c l' = do
70 l <- case l' of
71 Nothing -> defaultList c
72 Just l'' -> pure l''
73 dataPaired <- dataPairing a (c,l,Authors) takeName takeName
74 r <- insertDB $ prepareInsert dataPaired
75 _ <- insertNodeNode [ NodeNode { _nn_node1_id = c
76 , _nn_node2_id = a
77 , _nn_score = Nothing
78 , _nn_category = Nothing }]
79 pure r
80
81
82 dataPairing :: AnnuaireId
83 -> (CorpusId, ListId, NgramsType)
84 -> (ContactName -> Projected)
85 -> (DocAuthor -> Projected)
86 -> GargNoServer (HashMap ContactId (Set DocId))
87 dataPairing aId (cId, lId, ngt) fc fa = do
88 mc <- getNgramsContactId aId
89 md <- getNgramsDocId cId lId ngt
90
91 printDebug "ngramsContactId" mc
92 printDebug "ngramsDocId" md
93 let
94 from = projectionFrom (Set.fromList $ HM.keys mc) fc
95 to = projectionTo (Set.fromList $ HM.keys md) fa
96
97 pure $ fusion mc $ align from to md
98
99
100
101 prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
102 prepareInsert m = map (\(n1,n2) -> NodeNode { _nn_node1_id = n1
103 , _nn_node2_id = n2
104 , _nn_score = Nothing
105 , _nn_category = Nothing })
106 $ List.concat
107 $ map (\(contactId, setDocIds)
108 -> map (\setDocId
109 -> (contactId, setDocId)
110 ) $ Set.toList setDocIds
111 )
112 $ HM.toList m
113
114 ------------------------------------------------------------------------
115 type ContactName = NgramsTerm
116 type DocAuthor = NgramsTerm
117 type Projected = NgramsTerm
118
119 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
120 projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
121
122 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
123 projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
124 ------------------------------------------------------------------------
125 takeName :: NgramsTerm -> NgramsTerm
126 takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
127 where
128 texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
129 (lastName' texte)
130 lastName' = lastMay . DT.splitOn " "
131
132
133 ------------------------------------------------------------------------
134 align :: HashMap ContactName Projected
135 -> HashMap Projected (Set DocAuthor)
136 -> HashMap DocAuthor (Set DocId)
137 -> HashMap ContactName (Set DocId)
138 align mc ma md = HM.fromListWith (<>)
139 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
140 $ HM.keys mc
141 where
142 getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
143 getProjection ma' sa' =
144 if Set.null sa'
145 then Set.empty
146 else Set.unions $ sets ma' sa'
147 where
148 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
149 lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
150
151 testProjection :: ContactName
152 -> HashMap ContactName Projected
153 -> HashMap Projected (Set DocAuthor)
154 -> Set DocAuthor
155 testProjection cn' mc' ma' = case HM.lookup cn' mc' of
156 Nothing -> Set.empty
157 Just c -> case HM.lookup c ma' of
158 Nothing -> Set.empty
159 Just a -> a
160
161 fusion :: HashMap ContactName (Set ContactId)
162 -> HashMap ContactName (Set DocId)
163 -> HashMap ContactId (Set DocId)
164 fusion mc md = HM.fromListWith (<>)
165 $ catMaybes
166 $ [ (,) <$> Just cId <*> HM.lookup cn md
167 | (cn, setContactId) <- HM.toList mc
168 , cId <- Set.toList setContactId
169 ]
170 ------------------------------------------------------------------------
171
172 getNgramsContactId :: AnnuaireId
173 -> Cmd err (HashMap ContactName (Set NodeId))
174 getNgramsContactId aId = do
175 contacts <- getAllContacts aId
176 pure $ HM.fromListWith (<>)
177 $ catMaybes
178 $ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
179 <*> Just ( Set.singleton (contact^.node_id))
180 ) (tr_docs contacts)
181
182
183 getNgramsDocId :: CorpusId
184 -> ListId
185 -> NgramsType
186 -> GargNoServer (HashMap DocAuthor (Set NodeId))
187 getNgramsDocId cId lId nt = do
188 lIds <- selectNodesWithUsername NodeList userMaster
189 repo <- getRepo' lIds
190 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
191
192 groupNodesByNgrams ngs
193 <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)