]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[Community] specs + database schema adapted to link doc/contact
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 # Spécifications for pairing
11
12 database:
13
14 add NodeType Community (instead of texts, contacts)
15
16 nodes_nodes
17 corpusId_communitId
18
19 get defaultList Id of each (for now)
20
21 corpusId_docId
22 listId_ngramsId (authors)
23
24 listId_docId_ngramsId
25 listId_contactId_ngramsId'
26
27 if isSame ngramsId ngramsId'
28 then
29 insert listId_docId_contactId
30 else
31 nothing
32
33
34 -}
35
36 {-# LANGUAGE QuasiQuotes #-}
37 -- {-# LANGUAGE Arrows #-}
38
39 module Gargantext.Database.Action.Flow.Pairing
40 (pairing)
41 where
42
43 import Control.Lens (_Just, (^.))
44 import Data.Map (Map, fromList)
45 import Data.Maybe (catMaybes)
46 import Data.Text (Text, toLower)
47 import Database.PostgreSQL.Simple.SqlQQ (sql)
48 import Gargantext.Core.Types (TableResult(..))
49 import Gargantext.Database.Action.Flow.Utils
50 import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
51 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
52 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
53 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
54 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
55 import Gargantext.Prelude hiding (sum)
56 import Safe (lastMay)
57 import qualified Data.Map as DM
58 import qualified Data.Text as DT
59
60 -- TODO mv this type in Types Main
61 type Terms = Text
62
63 {-
64 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
65 pairing'' = undefined
66
67 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
68 pairing' = undefined
69 -}
70
71 -- | TODO : add paring policy as parameter
72 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
73 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
74 -> ListId
75 -> Cmd err Int
76 pairing cId aId lId = do
77 contacts' <- getAllContacts aId
78 let contactsMap = pairingPolicyToMap toLower
79 $ toMaps extractNgramsT (tr_docs contacts')
80
81 ngramsMap' <- getNgramsTindexed cId Authors
82 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
83
84 let indexedNgrams = pairMaps contactsMap ngramsMap
85
86 insertDocNgrams lId indexedNgrams
87
88 lastName :: Terms -> Terms
89 lastName texte = DT.toLower
90 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
91 where
92 lastName' = lastMay . DT.splitOn " "
93
94 -- TODO: this method is dangerous (maybe equalities of the result are
95 -- not taken into account emergency demo plan...)
96 pairingPolicyToMap :: (Terms -> Terms)
97 -> Map (NgramsT Ngrams) a
98 -> Map (NgramsT Ngrams) a
99 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
100
101 pairingPolicy :: (Terms -> Terms)
102 -> NgramsT Ngrams
103 -> NgramsT Ngrams
104 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
105
106 -- | TODO : use Occurrences in place of Int
107 extractNgramsT :: HyperdataContact
108 -> Map (NgramsT Ngrams) Int
109 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
110 where
111 authors = map text2ngrams
112 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
113
114
115 pairMaps :: Map (NgramsT Ngrams) a
116 -> Map (NgramsT Ngrams) NgramsId
117 -> Map NgramsIndexed (Map NgramsType a)
118 pairMaps m1 m2 =
119 DM.fromList
120 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
121 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
122 , Just nId <- [DM.lookup k m2]
123 ]
124
125 -----------------------------------------------------------------------
126 getNgramsTindexed :: CorpusId
127 -> NgramsType
128 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
129 getNgramsTindexed corpusId ngramsType' = fromList
130 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
131 <$> selectNgramsTindexed corpusId ngramsType'
132 where
133 selectNgramsTindexed :: CorpusId
134 -> NgramsType
135 -> Cmd err [(NgramsId, Terms, Int)]
136 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
137 where
138 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
139 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
140 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
141 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
142
143 WHERE nn.node1_id = ?
144 AND occ.ngrams_type = ?
145 AND occ.node2_id = nn.node2_id
146 GROUP BY n.id;
147 |]
148
149 {- | TODO more typed SQL queries
150 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
151 selectNgramsTindexed corpusId ngramsType = proc () -> do
152 nodeNode <- queryNodeNodeTable -< ()
153 nodeNgrams <- queryNodesNgramsTable -< ()
154 ngrams <- queryNgramsTable -< ()
155
156 restrict -< node1_id nodeNode .== pgInt4 corpusId
157 restrict -< node2_id nodeNode .== node_id nodeNgrams
158 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
159
160 result <- aggregate groupBy (ngrams_id ngrams)
161 returnA -< result
162 --}