]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
Merge branch 'dev-phylo' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantex...
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Control.Lens (_Just, (^.))
20 import Data.Map (Map, fromList, fromListWith)
21 import Data.Maybe (catMaybes, fromMaybe)
22 import Data.Set (Set)
23 import Data.Text (Text)
24 import Gargantext.API.Ngrams.Tools
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Core.Types (TableResult(..), Term)
27 import Gargantext.Core.Types.Main
28 import Gargantext.Database
29 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
30 import Gargantext.Database.Admin.Config
31 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
32 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
33 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
34 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
35 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
36 import Gargantext.Database.Query.Table.Node (defaultList)
37 import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
38 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
39 import Gargantext.Database.Schema.Node
40 import Gargantext.Prelude hiding (sum)
41 import Opaleye
42 import qualified Data.List as List
43 import qualified Data.Map as Map
44 import qualified Data.Set as Set
45 import qualified Data.Text as DT
46
47
48 -- | isPairedWith
49 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
50 -- isPairedWith NodeAnnuaire corpusId
51 isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
52 isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
53 where
54 selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
55 selectQuery nt' nId' = proc () -> do
56 (node, node_node) <- queryJoin -< ()
57 restrict -< (node^.node_typename) .== (pgInt4 $ nodeTypeId nt')
58 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
59 returnA -< node^.node_id
60
61 queryJoin :: Query (NodeRead, NodeNodeReadNull)
62 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
63 where
64 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
65
66 -----------------------------------------------------------------------
67 pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
68 pairing a c l' = do
69 l <- case l' of
70 Nothing -> defaultList c
71 Just l'' -> pure l''
72 dataPaired <- dataPairing a (c,l,Authors) takeName takeName
73 r <- insertDB $ prepareInsert dataPaired
74 _ <- insertNodeNode [ NodeNode c a Nothing Nothing]
75 pure r
76
77
78 dataPairing :: AnnuaireId
79 -> (CorpusId, ListId, NgramsType)
80 -> (ContactName -> Projected)
81 -> (DocAuthor -> Projected)
82 -> GargNoServer (Map ContactId (Set DocId))
83 dataPairing aId (cId, lId, ngt) fc fa = do
84 mc <- getNgramsContactId aId
85 md <- getNgramsDocId cId lId ngt
86
87 printDebug "ngramsContactId" mc
88 printDebug "ngramsDocId" md
89 let
90 from = projectionFrom (Set.fromList $ Map.keys mc) fc
91 to = projectionTo (Set.fromList $ Map.keys md) fa
92
93 pure $ fusion mc $ align from to md
94
95
96
97 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
98 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
99 $ List.concat
100 $ map (\(contactId, setDocIds)
101 -> map (\setDocId
102 -> (contactId, setDocId)
103 ) $ Set.toList setDocIds
104 )
105 $ Map.toList m
106
107 ------------------------------------------------------------------------
108 type ContactName = Text
109 type DocAuthor = Text
110 type Projected = Text
111
112 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
113 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
114
115 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
116 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
117 ------------------------------------------------------------------------
118 takeName :: Term -> Term
119 takeName texte = DT.toLower texte'
120 where
121 texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
122 (lastName' texte)
123 lastName' = lastMay . DT.splitOn " "
124
125
126 ------------------------------------------------------------------------
127 align :: Map ContactName Projected
128 -> Map Projected (Set DocAuthor)
129 -> Map DocAuthor (Set DocId)
130 -> Map ContactName (Set DocId)
131 align mc ma md = fromListWith (<>)
132 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
133 $ Map.keys mc
134 where
135 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
136 getProjection ma' sa' =
137 if Set.null sa'
138 then Set.empty
139 else Set.unions $ sets ma' sa'
140 where
141 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
142 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
143
144 testProjection :: ContactName
145 -> Map ContactName Projected
146 -> Map Projected (Set DocAuthor)
147 -> Set DocAuthor
148 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
149 Nothing -> Set.empty
150 Just c -> case Map.lookup c ma' of
151 Nothing -> Set.empty
152 Just a -> a
153
154 fusion :: Map ContactName (Set ContactId)
155 -> Map ContactName (Set DocId)
156 -> Map ContactId (Set DocId)
157 fusion mc md = Map.fromListWith (<>)
158 $ catMaybes
159 $ [ (,) <$> Just cId <*> Map.lookup cn md
160 | (cn, setContactId) <- Map.toList mc
161 , cId <- Set.toList setContactId
162 ]
163 ------------------------------------------------------------------------
164
165 getNgramsContactId :: AnnuaireId
166 -> Cmd err (Map ContactName (Set NodeId))
167 getNgramsContactId aId = do
168 contacts <- getAllContacts aId
169 pure $ fromListWith (<>)
170 $ catMaybes
171 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
172 <*> Just ( Set.singleton (contact^.node_id))
173 ) (tr_docs contacts)
174
175
176 getNgramsDocId :: CorpusId
177 -> ListId
178 -> NgramsType
179 -> GargNoServer (Map DocAuthor (Set NodeId))
180 getNgramsDocId cId lId nt = do
181 repo <- getRepo
182 lIds <- selectNodesWithUsername NodeList userMaster
183 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
184
185 groupNodesByNgrams ngs
186 <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)