]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
Merge branch 'dev-default-extensions' into dev
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 -- {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 (pairing)
17 where
18
19 import Control.Lens (_Just, (^.))
20 import Data.Map (Map, fromList)
21 import Data.Maybe (catMaybes)
22 import Data.Text (Text, toLower)
23 import Database.PostgreSQL.Simple.SqlQQ (sql)
24 import Gargantext.Core.Types (TableResult(..))
25 import Gargantext.Database.Action.Flow.Utils
26 import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
27 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
28 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
29 import Gargantext.Database.Query.Table.Node.Contact -- (HyperdataContact(..))
30 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
31 import Gargantext.Prelude hiding (sum)
32 import Safe (lastMay)
33 import qualified Data.Map as DM
34 import qualified Data.Text as DT
35
36 -- TODO mv this type in Types Main
37 type Terms = Text
38
39 {-
40 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
41 pairing'' = undefined
42
43 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
44 pairing' = undefined
45 -}
46
47 -- | TODO : add paring policy as parameter
48 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
49 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
50 -> ListId
51 -> Cmd err Int
52 pairing cId aId lId = do
53 contacts' <- getAllContacts aId
54 let contactsMap = pairingPolicyToMap toLower
55 $ toMaps extractNgramsT (tr_docs contacts')
56
57 ngramsMap' <- getNgramsTindexed cId Authors
58 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
59
60 let indexedNgrams = pairMaps contactsMap ngramsMap
61
62 insertDocNgrams lId indexedNgrams
63
64 lastName :: Terms -> Terms
65 lastName texte = DT.toLower
66 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
67 where
68 lastName' = lastMay . DT.splitOn " "
69
70 -- TODO: this method is dangerous (maybe equalities of the result are
71 -- not taken into account emergency demo plan...)
72 pairingPolicyToMap :: (Terms -> Terms)
73 -> Map (NgramsT Ngrams) a
74 -> Map (NgramsT Ngrams) a
75 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
76
77 pairingPolicy :: (Terms -> Terms)
78 -> NgramsT Ngrams
79 -> NgramsT Ngrams
80 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
81
82 -- | TODO : use Occurrences in place of Int
83 extractNgramsT :: HyperdataContact
84 -> Map (NgramsT Ngrams) Int
85 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
86 where
87 authors = map text2ngrams
88 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
89
90
91 pairMaps :: Map (NgramsT Ngrams) a
92 -> Map (NgramsT Ngrams) NgramsId
93 -> Map NgramsIndexed (Map NgramsType a)
94 pairMaps m1 m2 =
95 DM.fromList
96 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
97 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
98 , Just nId <- [DM.lookup k m2]
99 ]
100
101 -----------------------------------------------------------------------
102 getNgramsTindexed :: CorpusId
103 -> NgramsType
104 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
105 getNgramsTindexed corpusId ngramsType' = fromList
106 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
107 <$> selectNgramsTindexed corpusId ngramsType'
108 where
109 selectNgramsTindexed :: CorpusId
110 -> NgramsType
111 -> Cmd err [(NgramsId, Terms, Int)]
112 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
113 where
114 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
115 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
116 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
117 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
118
119 WHERE nn.node1_id = ?
120 AND occ.ngrams_type = ?
121 AND occ.node2_id = nn.node2_id
122 GROUP BY n.id;
123 |]
124
125 {- | TODO more typed SQL queries
126 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
127 selectNgramsTindexed corpusId ngramsType = proc () -> do
128 nodeNode <- queryNodeNodeTable -< ()
129 nodeNgrams <- queryNodesNgramsTable -< ()
130 ngrams <- queryNgramsTable -< ()
131
132 restrict -< node1_id nodeNode .== pgInt4 corpusId
133 restrict -< node2_id nodeNode .== node_id nodeNgrams
134 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
135
136 result <- aggregate groupBy (ngrams_id ngrams)
137 returnA -< result
138 --}