]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[DB][FLOW] fix duplicate ngrams insertion.
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 -- get data of NgramsTable
14 -- post :: update NodeNodeNgrams
15 -- group ngrams
16
17 get ngrams filtered by NgramsType
18 add get
19
20 -}
21
22 {-# LANGUAGE DataKinds #-}
23 {-# LANGUAGE DeriveGeneric #-}
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE OverloadedStrings #-}
26 {-# LANGUAGE ScopedTypeVariables #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE TypeOperators #-}
29 {-# LANGUAGE FlexibleInstances #-}
30 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
31 {-# OPTIONS -fno-warn-orphans #-}
32
33 module Gargantext.API.Ngrams
34 where
35
36 import Prelude (round)
37 -- import Gargantext.Database.User (UserId)
38 import Data.Patch.Class (Replace, replace)
39 --import qualified Data.Map.Strict.Patch as PM
40 import Data.Monoid
41 --import Data.Semigroup
42 import Data.Set (Set)
43 import qualified Data.Set as Set
44 --import Data.Maybe (catMaybes)
45 --import qualified Data.Map.Strict as DM
46 --import qualified Data.Set as Set
47 import Control.Lens (view, (.~))
48 import Data.Aeson
49 import Data.Aeson.TH (deriveJSON)
50 import Data.Either(Either(Left))
51 import Data.Map (lookup)
52 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
53 import Data.Swagger
54 import Data.Text (Text)
55 import Database.PostgreSQL.Simple (Connection)
56 import GHC.Generics (Generic)
57 import Gargantext.Core.Types (node_id)
58 --import Gargantext.Core.Types.Main (Tree(..))
59 import Gargantext.Core.Utils.Prefix (unPrefix)
60 import Gargantext.Database.Types.Node (NodeType(..))
61 import Gargantext.Database.Node (getListsWithParentId)
62 import qualified Gargantext.Database.Ngrams as Ngrams
63 import Gargantext.Prelude
64 import Gargantext.Core.Types (ListType(..), ListId)
65 import Prelude (Enum, Bounded, minBound, maxBound)
66 import Servant hiding (Patch)
67 import Test.QuickCheck (elements)
68 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
69
70 ------------------------------------------------------------------------
71 --data FacetFormat = Table | Chart
72 data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
73 deriving (Generic, Enum, Bounded)
74
75 instance FromHttpApiData TabType
76 where
77 parseUrlPiece "Docs" = pure Docs
78 parseUrlPiece "Terms" = pure Terms
79 parseUrlPiece "Sources" = pure Sources
80 parseUrlPiece "Institutes" = pure Institutes
81 parseUrlPiece "Authors" = pure Authors
82 parseUrlPiece "Trash" = pure Trash
83 parseUrlPiece _ = Left "Unexpected value of TabType"
84
85 instance ToParamSchema TabType
86 instance ToJSON TabType
87 instance FromJSON TabType
88 instance ToSchema TabType
89 instance Arbitrary TabType
90 where
91 arbitrary = elements [minBound .. maxBound]
92
93 ------------------------------------------------------------------------
94 type NgramsTerm = Text
95
96 data NgramsElement =
97 NgramsElement { _ne_ngrams :: NgramsTerm
98 , _ne_list :: ListType
99 , _ne_occurrences :: Int
100 , _ne_parent :: Maybe NgramsTerm
101 , _ne_children :: Set NgramsTerm
102 }
103 deriving (Ord, Eq, Show, Generic)
104 $(deriveJSON (unPrefix "_ne_") ''NgramsElement)
105
106 instance ToSchema NgramsElement
107 instance Arbitrary NgramsElement where
108 arbitrary = elements [NgramsElement "sport" GraphList 1 Nothing mempty]
109
110 ------------------------------------------------------------------------
111 newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
112 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
113
114 instance Arbitrary NgramsTable where
115 arbitrary = elements
116 [ NgramsTable
117 [ NgramsElement "animal" GraphList 1 Nothing (Set.fromList ["dog", "cat"])
118 , NgramsElement "cat" GraphList 1 (Just "animal") mempty
119 , NgramsElement "cats" StopList 4 Nothing mempty
120 , NgramsElement "dog" GraphList 3 (Just "animal")(Set.fromList ["dogs"])
121 , NgramsElement "dogs" StopList 4 (Just "dog") mempty
122 , NgramsElement "fox" GraphList 1 Nothing mempty
123 , NgramsElement "object" CandidateList 2 Nothing mempty
124 , NgramsElement "nothing" StopList 4 Nothing mempty
125 , NgramsElement "organic" GraphList 3 Nothing (Set.singleton "flower")
126 , NgramsElement "flower" GraphList 3 (Just "organic") mempty
127 , NgramsElement "moon" CandidateList 1 Nothing mempty
128 , NgramsElement "sky" StopList 1 Nothing mempty
129 ]
130 ]
131 instance ToSchema NgramsTable
132
133 ------------------------------------------------------------------------
134 -- On the Client side:
135 --data Action = InGroup NgramsId NgramsId
136 -- | OutGroup NgramsId NgramsId
137 -- | SetListType NgramsId ListType
138
139 data PatchSet a = PatchSet
140 { _rem :: Set a
141 , _add :: Set a
142 }
143 deriving (Eq, Ord, Show, Generic)
144
145 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
146 arbitrary = PatchSet <$> arbitrary <*> arbitrary
147
148 instance ToJSON a => ToJSON (PatchSet a) where
149 toJSON = genericToJSON $ unPrefix "_"
150 toEncoding = genericToEncoding $ unPrefix "_"
151
152 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
153 parseJSON = genericParseJSON $ unPrefix "_"
154
155 instance ToSchema a => ToSchema (PatchSet a)
156
157 instance ToSchema a => ToSchema (Replace a) where
158 declareNamedSchema (_ :: proxy (Replace a)) = do
159 aSchema <- declareSchemaRef (Proxy :: Proxy a)
160 return $ NamedSchema (Just "Replace") $ mempty
161 & type_ .~ SwaggerObject
162 & properties .~
163 InsOrdHashMap.fromList
164 [ ("old", aSchema)
165 , ("new", aSchema)
166 ]
167 & required .~ [ "old", "new" ]
168
169 data NgramsPatch =
170 NgramsPatch { _patch_children :: PatchSet NgramsElement
171 , _patch_list :: Replace ListType -- TODO Map UserId ListType
172 }
173 deriving (Ord, Eq, Show, Generic)
174 $(deriveJSON (unPrefix "_") ''NgramsPatch)
175
176 -- instance Semigroup NgramsPatch where
177
178 instance ToSchema NgramsPatch
179
180 instance Arbitrary NgramsPatch where
181 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
182
183 data NgramsIdPatch =
184 NgramsIdPatch { _nip_ngrams :: NgramsTerm
185 , _nip_ngramsPatch :: NgramsPatch
186 }
187 deriving (Ord, Eq, Show, Generic)
188 $(deriveJSON (unPrefix "_nip_") ''NgramsIdPatch)
189
190 instance ToSchema NgramsIdPatch
191
192 instance Arbitrary NgramsIdPatch where
193 arbitrary = NgramsIdPatch <$> arbitrary <*> arbitrary
194
195 --
196 -- TODO:
197 -- * This should be a Map NgramsId NgramsPatch
198 -- * Patchs -> Patches
199 newtype NgramsIdPatchs =
200 NgramsIdPatchs { _nip_ngramsIdPatchs :: [NgramsIdPatch] }
201 deriving (Ord, Eq, Show, Generic, Arbitrary)
202 $(deriveJSON (unPrefix "_nip_") ''NgramsIdPatchs)
203 instance ToSchema NgramsIdPatchs
204
205 ------------------------------------------------------------------------
206 ------------------------------------------------------------------------
207 type Version = Int
208
209 data Versioned a = Versioned
210 { _v_version :: Version
211 , _v_data :: a
212 }
213
214 {-
215 -- TODO sequencs of modifications (Patchs)
216 type NgramsIdPatch = Patch NgramsId NgramsPatch
217
218 ngramsPatch :: Int -> NgramsPatch
219 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopList)]) (Set.fromList [n]) Set.empty
220
221 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
222 toEdit n p = Edit n p
223 ngramsIdPatch :: Patch NgramsId NgramsPatch
224 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
225 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
226 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
227 ]
228
229 -- applyPatchBack :: Patch -> IO Patch
230 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
231 -}
232 ------------------------------------------------------------------------
233 ------------------------------------------------------------------------
234 ------------------------------------------------------------------------
235 type CorpusId = Int
236
237 type TableNgramsApiGet = Summary " Table Ngrams API Get"
238 :> QueryParam "ngramsType" TabType
239 :> QueryParam "list" ListId
240 :> Get '[JSON] NgramsTable
241
242 type TableNgramsApi = Summary " Table Ngrams API Change"
243 :> QueryParam "list" ListId
244 :> ReqBody '[JSON] NgramsIdPatchsFeed -- Versioned ...
245 :> Put '[JSON] NgramsIdPatchsBack -- Versioned ...
246
247 type NgramsIdPatchsFeed = NgramsIdPatchs
248 type NgramsIdPatchsBack = NgramsIdPatchs
249
250
251 defaultList :: Connection -> CorpusId -> IO ListId
252 defaultList c cId = view node_id <$> maybe (panic noListFound) identity
253 <$> head
254 <$> getListsWithParentId c cId
255 where
256 noListFound = "Gargantext.API.Ngrams.defaultList: no list found"
257
258 {-
259 toLists :: ListId -> NgramsIdPatchs -> [(ListId, NgramsId, ListTypeId)]
260 -- toLists = undefined
261 toLists lId np = [ (lId,ngId,listTypeId lt) | map (toList lId) (_nip_ngramsIdPatchs np) ]
262
263 toList :: ListId -> NgramsIdPatch -> (ListId, NgramsId, ListTypeId)
264 toList = undefined
265
266 toGroups :: ListId -> (NgramsPatch -> Set NgramsId) -> NgramsIdPatchs -> [NodeNgramsNgrams]
267 toGroups lId addOrRem ps = concat $ map (toGroup lId addOrRem) $ _nip_ngramsIdPatchs ps
268
269 toGroup :: ListId -> (NgramsPatch -> Set NgramsId) -> NgramsIdPatch -> [NodeNgramsNgrams]
270 -- toGroup = undefined
271 toGroup lId addOrRem (NgramsIdPatch ngId patch) =
272 map (\ng -> (NodeNgramsNgrams lId ngId ng (Just 1))) (Set.toList $ addOrRem patch)
273
274 -}
275
276 tableNgramsPatch :: Connection -> CorpusId -> Maybe ListId -> NgramsIdPatchsFeed -> IO NgramsIdPatchsBack
277 tableNgramsPatch = undefined
278 {-
279 tableNgramsPatch conn corpusId maybeList patchs = do
280 listId <- case maybeList of
281 Nothing -> defaultList conn corpusId
282 Just listId' -> pure listId'
283 _ <- ngramsGroup' conn Add $ toGroups listId _np_add_children patchs
284 _ <- ngramsGroup' conn Del $ toGroups listId _np_rem_children patchs
285 _ <- updateNodeNgrams conn (toLists listId patchs)
286 pure (NgramsIdPatchs [])
287 -}
288
289 -- | TODO Errors management
290 -- TODO: polymorphic for Annuaire or Corpus or ...
291 getTableNgrams :: Connection -> CorpusId -> Maybe TabType -> Maybe ListId -> IO NgramsTable
292 getTableNgrams c cId maybeTabType maybeListId = do
293 let lieu = "Garg.API.Ngrams: " :: Text
294 let ngramsType = case maybeTabType of
295 Nothing -> Ngrams.Sources -- panic (lieu <> "Indicate the Table")
296 Just tab -> case tab of
297 Sources -> Ngrams.Sources
298 Authors -> Ngrams.Authors
299 Institutes -> Ngrams.Institutes
300 Terms -> Ngrams.NgramsTerms
301 _ -> panic $ lieu <> "No Ngrams for this tab"
302
303 listId <- case maybeListId of
304 Nothing -> defaultList c cId
305 Just lId -> pure lId
306
307 (ngramsTableDatas, mapToParent, mapToChildren) <-
308 Ngrams.getNgramsTableDb c NodeDocument ngramsType (Ngrams.NgramsTableParam listId cId)
309
310 printDebug "ngramsTableDatas" ngramsTableDatas
311
312 pure $ NgramsTable $ map (\(Ngrams.NgramsTableData ngs _ lt w) ->
313 NgramsElement ngs
314 (maybe (panic $ lieu <> "listType") identity lt)
315 (round w)
316 (lookup ngs mapToParent)
317 (maybe mempty identity $ lookup ngs mapToChildren)
318 ) ngramsTableDatas
319
320