]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
ServantJob, addToCorpus
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 (pairing)
20 where
21
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just,view)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
30 import Safe (lastMay)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
36 --import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
37 --import Gargantext.Database.Types.Node -- (Hyperdata(..))
38 import Gargantext.Database.Node.Contact
39 import Gargantext.Database.Flow.Utils
40 import Gargantext.Database.Utils (Cmd, runPGSQuery)
41 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId)
42 import Gargantext.Database.Node.Children
43 import Gargantext.Core.Types (NodeType(..))
44
45 -- TODO mv this type in Types Main
46 type Terms = Text
47
48 -- | TODO : add paring policy as parameter
49 pairing :: AnnuaireId -> CorpusId -> Cmd err Int
50 pairing aId cId = do
51 contacts' <- getContacts aId (Just NodeContact)
52 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
53
54 ngramsMap' <- getNgramsTindexed cId Authors
55 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
56
57 let indexedNgrams = pairMaps contactsMap ngramsMap
58
59 insertToNodeNgrams indexedNgrams
60 -- TODO add List
61
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
64 where
65 lastName' = lastMay . DT.splitOn " "
66
67 -- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
68 -- emergency demo plan...
69 pairingPolicyToMap :: (Terms -> Terms)
70 -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
71 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
72
73 pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
74 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
75
76 -- | TODO : use Occurrences in place of Int
77 extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
78 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
79 where
80 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
81 --}
82
83 -- NP: notice how this function is no longer specific to the ContactId type
84 pairMaps :: Map (NgramsT Ngrams) a
85 -> Map (NgramsT Ngrams) NgramsId
86 -> Map NgramsIndexed (Map NgramsType a)
87 pairMaps m1 m2 =
88 DM.fromList
89 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
90 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
91 , Just nId <- [DM.lookup k m2]
92 ]
93
94 -----------------------------------------------------------------------
95 getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId)
96 getNgramsTindexed corpusId ngramsType' = fromList
97 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
98 <$> selectNgramsTindexed corpusId ngramsType'
99
100 selectNgramsTindexed :: CorpusId -> NgramsType -> Cmd err [(NgramsId, Terms, Int)]
101 selectNgramsTindexed corpusId ngramsType'' = runPGSQuery selectQuery (corpusId, ngramsTypeId ngramsType'')
102 where
103 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
104 JOIN nodes_ngrams occ ON occ.ngram_id = n.id
105 JOIN nodes_nodes nn ON nn.node2_id = occ.node_id
106
107 WHERE nn.node1_id = ?
108 AND occ.ngrams_type = ?
109 AND occ.node_id = nn.node2_id
110 GROUP BY n.id;
111 |]
112
113 {- | TODO more typed SQL queries
114 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
115 selectNgramsTindexed corpusId ngramsType = proc () -> do
116 nodeNode <- queryNodeNodeTable -< ()
117 nodeNgrams <- queryNodesNgramsTable -< ()
118 ngrams <- queryNgramsTable -< ()
119
120 restrict -< node1_id nodeNode .== pgInt4 corpusId
121 restrict -< node2_id nodeNode .== node_id nodeNgrams
122 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
123
124 result <- aggregate groupBy (ngrams_id ngrams)
125 returnA -< result
126 --}
127
128