]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
Merge branch 'dev-refactor-metrics' of ssh://gitlab.iscpif.fr:20022/gargantext/haskel...
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Control.Lens (_Just, (^.))
20 import Data.HashMap.Strict (HashMap)
21 import qualified Data.HashMap.Strict as HM
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Set (Set)
24 import Gargantext.API.Ngrams.Tools
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Prelude (GargNoServer)
27 import Gargantext.Core.Types (TableResult(..))
28 import Gargantext.Core.Types.Main
29 import Gargantext.Database
30 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
31 import Gargantext.Database.Admin.Config
32 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
33 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
34 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
35 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
36 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
37 import Gargantext.Database.Query.Table.Node (defaultList)
38 import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
39 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
40 import Gargantext.Database.Schema.Node
41 import Gargantext.Prelude hiding (sum)
42 import Opaleye
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as DT
47
48
49 -- | isPairedWith
50 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
51 -- isPairedWith NodeAnnuaire corpusId
52 isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
53 isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
54 where
55 selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
56 selectQuery nt' nId' = proc () -> do
57 (node, node_node) <- queryJoin -< ()
58 restrict -< (node^.node_typename) .== (pgInt4 $ nodeTypeId nt')
59 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
60 returnA -< node^.node_id
61
62 queryJoin :: Query (NodeRead, NodeNodeReadNull)
63 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
64 where
65 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
66
67 -----------------------------------------------------------------------
68 pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
69 pairing a c l' = do
70 l <- case l' of
71 Nothing -> defaultList c
72 Just l'' -> pure l''
73 dataPaired <- dataPairing a (c,l,Authors) takeName takeName
74 r <- insertDB $ prepareInsert dataPaired
75 _ <- insertNodeNode [ NodeNode c a Nothing Nothing]
76 pure r
77
78
79 dataPairing :: AnnuaireId
80 -> (CorpusId, ListId, NgramsType)
81 -> (ContactName -> Projected)
82 -> (DocAuthor -> Projected)
83 -> GargNoServer (HashMap ContactId (Set DocId))
84 dataPairing aId (cId, lId, ngt) fc fa = do
85 mc <- getNgramsContactId aId
86 md <- getNgramsDocId cId lId ngt
87
88 printDebug "ngramsContactId" mc
89 printDebug "ngramsDocId" md
90 let
91 from = projectionFrom (Set.fromList $ HM.keys mc) fc
92 to = projectionTo (Set.fromList $ HM.keys md) fa
93
94 pure $ fusion mc $ align from to md
95
96
97
98 prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
99 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
100 $ List.concat
101 $ map (\(contactId, setDocIds)
102 -> map (\setDocId
103 -> (contactId, setDocId)
104 ) $ Set.toList setDocIds
105 )
106 $ HM.toList m
107
108 ------------------------------------------------------------------------
109 type ContactName = NgramsTerm
110 type DocAuthor = NgramsTerm
111 type Projected = NgramsTerm
112
113 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
114 projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
115
116 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
117 projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
118 ------------------------------------------------------------------------
119 takeName :: NgramsTerm -> NgramsTerm
120 takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
121 where
122 texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
123 (lastName' texte)
124 lastName' = lastMay . DT.splitOn " "
125
126
127 ------------------------------------------------------------------------
128 align :: HashMap ContactName Projected
129 -> HashMap Projected (Set DocAuthor)
130 -> HashMap DocAuthor (Set DocId)
131 -> HashMap ContactName (Set DocId)
132 align mc ma md = HM.fromListWith (<>)
133 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
134 $ HM.keys mc
135 where
136 getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
137 getProjection ma' sa' =
138 if Set.null sa'
139 then Set.empty
140 else Set.unions $ sets ma' sa'
141 where
142 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
143 lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
144
145 testProjection :: ContactName
146 -> HashMap ContactName Projected
147 -> HashMap Projected (Set DocAuthor)
148 -> Set DocAuthor
149 testProjection cn' mc' ma' = case HM.lookup cn' mc' of
150 Nothing -> Set.empty
151 Just c -> case HM.lookup c ma' of
152 Nothing -> Set.empty
153 Just a -> a
154
155 fusion :: HashMap ContactName (Set ContactId)
156 -> HashMap ContactName (Set DocId)
157 -> HashMap ContactId (Set DocId)
158 fusion mc md = HM.fromListWith (<>)
159 $ catMaybes
160 $ [ (,) <$> Just cId <*> HM.lookup cn md
161 | (cn, setContactId) <- HM.toList mc
162 , cId <- Set.toList setContactId
163 ]
164 ------------------------------------------------------------------------
165
166 getNgramsContactId :: AnnuaireId
167 -> Cmd err (HashMap ContactName (Set NodeId))
168 getNgramsContactId aId = do
169 contacts <- getAllContacts aId
170 pure $ HM.fromListWith (<>)
171 $ catMaybes
172 $ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
173 <*> Just ( Set.singleton (contact^.node_id))
174 ) (tr_docs contacts)
175
176
177 getNgramsDocId :: CorpusId
178 -> ListId
179 -> NgramsType
180 -> GargNoServer (HashMap DocAuthor (Set NodeId))
181 getNgramsDocId cId lId nt = do
182 repo <- getRepo
183 lIds <- selectNodesWithUsername NodeList userMaster
184 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
185
186 groupNodesByNgrams ngs
187 <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)