]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[TableResult] refactor to use TableResult type
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 (pairing)
20 where
21
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
30 import Safe (lastMay)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Core.Types (TableResult(..))
36 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
37 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId)
41 import Gargantext.Database.Node.Children (getAllContacts)
42
43 -- TODO mv this type in Types Main
44 type Terms = Text
45
46 -- | TODO : add paring policy as parameter
47 pairing :: AnnuaireId
48 -> CorpusId
49 -> ListId
50 -> Cmd err Int
51 pairing aId cId lId = do
52 contacts' <- getAllContacts aId
53 let contactsMap = pairingPolicyToMap toLower
54 $ toMaps extractNgramsT (tr_docs contacts')
55
56 ngramsMap' <- getNgramsTindexed cId Authors
57 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
58
59 let indexedNgrams = pairMaps contactsMap ngramsMap
60
61 insertDocNgrams lId indexedNgrams
62
63 lastName :: Terms -> Terms
64 lastName texte = DT.toLower
65 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
66 where
67 lastName' = lastMay . DT.splitOn " "
68
69 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
70 -- emergency demo plan...)
71 pairingPolicyToMap :: (Terms -> Terms)
72 -> Map (NgramsT Ngrams) a
73 -> Map (NgramsT Ngrams) a
74 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
75
76 pairingPolicy :: (Terms -> Terms)
77 -> NgramsT Ngrams
78 -> NgramsT Ngrams
79 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
80
81 -- | TODO : use Occurrences in place of Int
82 extractNgramsT :: HyperdataContact
83 -> Map (NgramsT Ngrams) Int
84 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
85 where
86 authors = map text2ngrams
87 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
88
89
90 pairMaps :: Map (NgramsT Ngrams) a
91 -> Map (NgramsT Ngrams) NgramsId
92 -> Map NgramsIndexed (Map NgramsType a)
93 pairMaps m1 m2 =
94 DM.fromList
95 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
96 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
97 , Just nId <- [DM.lookup k m2]
98 ]
99
100 -----------------------------------------------------------------------
101 getNgramsTindexed :: CorpusId
102 -> NgramsType
103 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
104 getNgramsTindexed corpusId ngramsType' = fromList
105 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
106 <$> selectNgramsTindexed corpusId ngramsType'
107 where
108 selectNgramsTindexed :: CorpusId
109 -> NgramsType
110 -> Cmd err [(NgramsId, Terms, Int)]
111 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
112 where
113 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
114 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
115 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
116
117 WHERE nn.node1_id = ?
118 AND occ.ngrams_type = ?
119 AND occ.node2_id = nn.node2_id
120 GROUP BY n.id;
121 |]
122
123 {- | TODO more typed SQL queries
124 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
125 selectNgramsTindexed corpusId ngramsType = proc () -> do
126 nodeNode <- queryNodeNodeTable -< ()
127 nodeNgrams <- queryNodesNgramsTable -< ()
128 ngrams <- queryNgramsTable -< ()
129
130 restrict -< node1_id nodeNode .== pgInt4 corpusId
131 restrict -< node2_id nodeNode .== node_id nodeNgrams
132 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
133
134 result <- aggregate groupBy (ngrams_id ngrams)
135 returnA -< result
136 --}