]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FIX] BUG limit on Nodes by Ngrams count.
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# LANGUAGE ConstraintKinds #-}
19 {-# LANGUAGE DataKinds #-}
20 {-# LANGUAGE DeriveGeneric #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE ScopedTypeVariables #-}
24 {-# LANGUAGE TemplateHaskell #-}
25 {-# LANGUAGE TypeOperators #-}
26 {-# LANGUAGE FlexibleContexts #-}
27 {-# LANGUAGE FlexibleInstances #-}
28 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
29 {-# LANGUAGE MultiParamTypeClasses #-}
30 {-# LANGUAGE RankNTypes #-}
31 {-# LANGUAGE TypeFamilies #-}
32 {-# OPTIONS -fno-warn-orphans #-}
33
34 module Gargantext.API.Ngrams
35 where
36
37 -- import Debug.Trace (trace)
38 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
39 -- import Gargantext.Database.Schema.User (UserId)
40 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
41 Composable(..), Transformable(..),
42 PairPatch(..), Patched, ConflictResolution,
43 ConflictResolutionReplace, ours)
44 import qualified Data.Map.Strict.Patch as PM
45 import Data.Monoid
46 import Data.Ord (Down(..))
47 import Data.Foldable
48 --import Data.Semigroup
49 import Data.Set (Set)
50 import qualified Data.Set as S
51 import qualified Data.List as List
52 import Data.Maybe (fromMaybe)
53 -- import Data.Tuple.Extra (first)
54 import qualified Data.Map.Strict as Map
55 import Data.Map.Strict (Map)
56 import qualified Data.Set as Set
57 import Control.Category ((>>>))
58 import Control.Concurrent
59 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_)
60 import Control.Monad.Error.Class (MonadError)
61 import Control.Monad.Reader
62 import Control.Monad.State
63 import Data.Aeson hiding ((.=))
64 import Data.Aeson.TH (deriveJSON)
65 import Data.Either(Either(Left))
66 -- import Data.Map (lookup)
67 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
68 import Data.Swagger hiding (version, patch)
69 import Data.Text (Text, isInfixOf, count)
70 import Data.Validity
71 import GHC.Generics (Generic)
72 import Gargantext.Core.Utils.Prefix (unPrefix)
73 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
74 import Gargantext.Database.Config (userMaster)
75 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySlow)
76 import Gargantext.Database.Schema.Ngrams (NgramsType)
77 import Gargantext.Database.Types.Node (NodeType(..))
78 import Gargantext.Database.Utils (fromField', HasConnection)
79 import Gargantext.Database.Node.Select
80 import Gargantext.Database.Ngrams
81 --import Gargantext.Database.Lists (listsWith)
82 import Gargantext.Database.Schema.Node (HasNodeError)
83 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
84 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
85 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
86 import Gargantext.Prelude
87 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
88 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
89 import Servant hiding (Patch)
90 import System.FileLock (FileLock)
91 import Test.QuickCheck (elements)
92 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
93
94 data TODO = TODO
95 deriving (Generic)
96
97 instance ToSchema TODO where
98
99 ------------------------------------------------------------------------
100 --data FacetFormat = Table | Chart
101 data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
102 | Contacts
103 deriving (Generic, Enum, Bounded)
104
105 instance FromHttpApiData TabType
106 where
107 parseUrlPiece "Docs" = pure Docs
108 parseUrlPiece "Terms" = pure Terms
109 parseUrlPiece "Sources" = pure Sources
110 parseUrlPiece "Institutes" = pure Institutes
111 parseUrlPiece "Authors" = pure Authors
112 parseUrlPiece "Trash" = pure Trash
113
114 parseUrlPiece "Contacts" = pure Contacts
115
116 parseUrlPiece _ = Left "Unexpected value of TabType"
117
118 instance ToParamSchema TabType
119 instance ToJSON TabType
120 instance FromJSON TabType
121 instance ToSchema TabType
122 instance Arbitrary TabType
123 where
124 arbitrary = elements [minBound .. maxBound]
125
126 newtype MSet a = MSet (Map a ())
127 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
128
129 instance ToJSON a => ToJSON (MSet a) where
130 toJSON (MSet m) = toJSON (Map.keys m)
131 toEncoding (MSet m) = toEncoding (Map.keys m)
132
133 mSetFromSet :: Set a -> MSet a
134 mSetFromSet = MSet . Map.fromSet (const ())
135
136 mSetFromList :: Ord a => [a] -> MSet a
137 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
138
139 -- mSetToSet :: Ord a => MSet a -> Set a
140 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
141 mSetToSet :: Ord a => MSet a -> Set a
142 mSetToSet = Set.fromList . mSetToList
143
144 mSetToList :: MSet a -> [a]
145 mSetToList (MSet a) = Map.keys a
146
147 instance Foldable MSet where
148 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
149
150 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
151 parseJSON = fmap mSetFromList . parseJSON
152
153 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
154 -- TODO
155 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
156
157 ------------------------------------------------------------------------
158 type NgramsTerm = Text
159
160 data RootParent = RootParent
161 { _rp_root :: NgramsTerm
162 , _rp_parent :: NgramsTerm
163 }
164 deriving (Ord, Eq, Show, Generic)
165
166 deriveJSON (unPrefix "_rp_") ''RootParent
167 makeLenses ''RootParent
168
169 data NgramsRepoElement = NgramsRepoElement
170 { _nre_size :: Int
171 , _nre_list :: ListType
172 --, _nre_root_parent :: Maybe RootParent
173 , _nre_root :: Maybe NgramsTerm
174 , _nre_parent :: Maybe NgramsTerm
175 , _nre_children :: MSet NgramsTerm
176 }
177 deriving (Ord, Eq, Show, Generic)
178
179 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
180 makeLenses ''NgramsRepoElement
181
182 data NgramsElement =
183 NgramsElement { _ne_ngrams :: NgramsTerm
184 , _ne_size :: Int
185 , _ne_list :: ListType
186 , _ne_occurrences :: Int
187 , _ne_root :: Maybe NgramsTerm
188 , _ne_parent :: Maybe NgramsTerm
189 , _ne_children :: MSet NgramsTerm
190 }
191 deriving (Ord, Eq, Show, Generic)
192
193 deriveJSON (unPrefix "_ne_") ''NgramsElement
194 makeLenses ''NgramsElement
195
196 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
197 mkNgramsElement ngrams list rp children =
198 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
199 where
200 -- TODO review
201 size = 1 + count " " ngrams
202
203 instance ToSchema NgramsElement
204 instance Arbitrary NgramsElement where
205 arbitrary = elements [mkNgramsElement "sport" GraphTerm Nothing mempty]
206
207 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
208 ngramsElementToRepo
209 (NgramsElement { _ne_size = s
210 , _ne_list = l
211 , _ne_root = r
212 , _ne_parent = p
213 , _ne_children = c
214 }) =
215 NgramsRepoElement
216 { _nre_size = s
217 , _nre_list = l
218 , _nre_parent = p
219 , _nre_root = r
220 , _nre_children = c
221 }
222
223 ngramsElementFromRepo :: (NgramsTerm, NgramsRepoElement) -> NgramsElement
224 ngramsElementFromRepo
225 (ngrams,
226 NgramsRepoElement
227 { _nre_size = s
228 , _nre_list = l
229 , _nre_parent = p
230 , _nre_root = r
231 , _nre_children = c
232 }) =
233 NgramsElement { _ne_size = s
234 , _ne_list = l
235 , _ne_root = r
236 , _ne_parent = p
237 , _ne_children = c
238 , _ne_ngrams = ngrams
239 , _ne_occurrences = 0 -- panic "API.Ngrams._ne_occurrences"
240 {-
241 -- Here we could use 0 if we want to avoid any `panic`.
242 -- It will not happen using getTableNgrams if
243 -- getOccByNgramsOnly provides a count of occurrences for
244 -- all the ngrams given.
245 -}
246 }
247
248 ------------------------------------------------------------------------
249 newtype NgramsTable = NgramsTable [NgramsElement]
250 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
251
252 type ListNgrams = NgramsTable
253
254 makePrisms ''NgramsTable
255
256 -- | Question: why these repetition of Type in this instance
257 -- may you document it please ?
258 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
259 each = _NgramsTable . each
260
261 -- TODO discuss
262 -- | TODO Check N and Weight
263 {-
264 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
265 toNgramsElement ns = map toNgramsElement' ns
266 where
267 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
268 where
269 p' = case p of
270 Nothing -> Nothing
271 Just x -> lookup x mapParent
272 c' = maybe mempty identity $ lookup t mapChildren
273 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
274
275 mapParent :: Map Int Text
276 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
277
278 mapChildren :: Map Text (Set Text)
279 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
280 $ Map.fromListWith (<>)
281 $ map (first fromJust)
282 $ filter (isJust . fst)
283 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
284 -}
285
286 mockTable :: NgramsTable
287 mockTable = NgramsTable
288 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
289 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
290 , mkNgramsElement "cats" StopTerm Nothing mempty
291 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
292 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
293 , mkNgramsElement "fox" GraphTerm Nothing mempty
294 , mkNgramsElement "object" CandidateTerm Nothing mempty
295 , mkNgramsElement "nothing" StopTerm Nothing mempty
296 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
297 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
298 , mkNgramsElement "moon" CandidateTerm Nothing mempty
299 , mkNgramsElement "sky" StopTerm Nothing mempty
300 ]
301 where
302 rp n = Just $ RootParent n n
303
304 instance Arbitrary NgramsTable where
305 arbitrary = pure mockTable
306
307 instance ToSchema NgramsTable
308
309 ------------------------------------------------------------------------
310 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
311
312 ------------------------------------------------------------------------
313 -- On the Client side:
314 --data Action = InGroup NgramsId NgramsId
315 -- | OutGroup NgramsId NgramsId
316 -- | SetListType NgramsId ListType
317
318 data PatchSet a = PatchSet
319 { _rem :: Set a
320 , _add :: Set a
321 }
322 deriving (Eq, Ord, Show, Generic)
323
324 makeLenses ''PatchSet
325 makePrisms ''PatchSet
326
327 instance ToJSON a => ToJSON (PatchSet a) where
328 toJSON = genericToJSON $ unPrefix "_"
329 toEncoding = genericToEncoding $ unPrefix "_"
330
331 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
332 parseJSON = genericParseJSON $ unPrefix "_"
333
334 {-
335 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
336 arbitrary = PatchSet <$> arbitrary <*> arbitrary
337
338 type instance Patched (PatchSet a) = Set a
339
340 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
341 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
342
343 instance Ord a => Semigroup (PatchSet a) where
344 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
345 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
346 } -- TODO Review
347
348 instance Ord a => Monoid (PatchSet a) where
349 mempty = PatchSet mempty mempty
350
351 instance Ord a => Group (PatchSet a) where
352 invert (PatchSet r a) = PatchSet a r
353
354 instance Ord a => Composable (PatchSet a) where
355 composable _ _ = undefined
356
357 instance Ord a => Action (PatchSet a) (Set a) where
358 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
359
360 instance Applicable (PatchSet a) (Set a) where
361 applicable _ _ = mempty
362
363 instance Ord a => Validity (PatchSet a) where
364 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
365
366 instance Ord a => Transformable (PatchSet a) where
367 transformable = undefined
368
369 conflicts _p _q = undefined
370
371 transformWith conflict p q = undefined conflict p q
372
373 instance ToSchema a => ToSchema (PatchSet a)
374 -}
375
376 type AddRem = Replace (Maybe ())
377
378 remPatch, addPatch :: AddRem
379 remPatch = replace (Just ()) Nothing
380 addPatch = replace Nothing (Just ())
381
382 isRem :: Replace (Maybe ()) -> Bool
383 isRem = (== remPatch)
384
385 type PatchMap = PM.PatchMap
386
387 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
388 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
389 Transformable, Composable)
390
391 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
392 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
393
394 -- TODO this breaks module abstraction
395 makePrisms ''PM.PatchMap
396
397 makePrisms ''PatchMSet
398
399 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
400 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
401 where
402 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
403 f = Map.partition isRem >>> both %~ Map.keysSet
404
405 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
406 g (rems, adds) = Map.fromSet (const remPatch) rems
407 <> Map.fromSet (const addPatch) adds
408
409 instance Ord a => Action (PatchMSet a) (MSet a) where
410 act (PatchMSet p) (MSet m) = MSet $ act p m
411
412 instance Ord a => Applicable (PatchMSet a) (MSet a) where
413 applicable (PatchMSet p) (MSet m) = applicable p m
414
415 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
416 toJSON = toJSON . view _PatchMSetIso
417 toEncoding = toEncoding . view _PatchMSetIso
418
419 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
420 parseJSON = fmap (_PatchMSetIso #) . parseJSON
421
422 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
423 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
424
425 instance ToSchema a => ToSchema (PatchMSet a) where
426 -- TODO
427 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
428
429 type instance Patched (PatchMSet a) = MSet a
430
431 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
432 arbitrary = uncurry replace <$> arbitrary
433 -- If they happen to be equal then the patch is Keep.
434
435 instance ToSchema a => ToSchema (Replace a) where
436 declareNamedSchema (_ :: proxy (Replace a)) = do
437 -- TODO Keep constructor is not supported here.
438 aSchema <- declareSchemaRef (Proxy :: Proxy a)
439 return $ NamedSchema (Just "Replace") $ mempty
440 & type_ .~ SwaggerObject
441 & properties .~
442 InsOrdHashMap.fromList
443 [ ("old", aSchema)
444 , ("new", aSchema)
445 ]
446 & required .~ [ "old", "new" ]
447
448 data NgramsPatch =
449 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
450 , _patch_list :: Replace ListType -- TODO Map UserId ListType
451 }
452 deriving (Eq, Show, Generic)
453
454 deriveJSON (unPrefix "_") ''NgramsPatch
455 makeLenses ''NgramsPatch
456
457 instance ToSchema NgramsPatch
458
459 instance Arbitrary NgramsPatch where
460 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
461
462 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
463
464 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
465 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
466
467 instance Semigroup NgramsPatch where
468 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
469
470 instance Monoid NgramsPatch where
471 mempty = _NgramsPatch # mempty
472
473 instance Validity NgramsPatch where
474 validate p = p ^. _NgramsPatch . to validate
475
476 instance Transformable NgramsPatch where
477 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
478
479 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
480
481 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
482 where
483 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
484
485 type ConflictResolutionNgramsPatch =
486 ( ConflictResolutionPatchMSet NgramsTerm
487 , ConflictResolutionReplace ListType
488 )
489 type instance ConflictResolution NgramsPatch =
490 ConflictResolutionNgramsPatch
491
492 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
493 -- ~ Patched NgramsPatchIso
494 type instance Patched NgramsPatch = PatchedNgramsPatch
495
496 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
497 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
498 applicable p (Just nre) =
499 applicable (p ^. patch_children) (nre ^. nre_children) <>
500 applicable (p ^. patch_list) (nre ^. nre_list)
501
502 instance Action NgramsPatch NgramsRepoElement where
503 act p = (nre_children %~ act (p ^. patch_children))
504 . (nre_list %~ act (p ^. patch_list))
505
506 instance Action NgramsPatch (Maybe NgramsRepoElement) where
507 act = fmap . act
508
509 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
510 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
511
512 instance FromField NgramsTablePatch
513 where
514 fromField = fromField'
515
516 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
517 where
518 fromField = fromField'
519
520 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
521 --
522 type instance ConflictResolution NgramsTablePatch =
523 NgramsTerm -> ConflictResolutionNgramsPatch
524
525 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
526 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
527 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
528
529 makePrisms ''NgramsTablePatch
530 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
531 instance ToSchema NgramsTablePatch
532
533 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
534 applicable p = applicable (p ^. _NgramsTablePatch)
535
536 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
537 act p =
538 fmap (execState (reParentNgramsTablePatch p)) .
539 act (p ^. _NgramsTablePatch)
540
541 instance Arbitrary NgramsTablePatch where
542 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
543
544 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
545 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
546 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
547
548 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
549
550 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
551 reRootChildren root ngram = do
552 nre <- use $ at ngram
553 forOf_ (_Just . nre_children . folded) nre $ \child -> do
554 at child . _Just . nre_root ?= root
555 reRootChildren root child
556
557 reParent :: Maybe RootParent -> ReParent NgramsTerm
558 reParent rp child = do
559 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
560 . (nre_root .~ (_rp_root <$> rp))
561 )
562 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
563
564 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
565 reParentAddRem rp child p =
566 reParent (if isRem p then Nothing else Just rp) child
567
568 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
569 reParentNgramsPatch parent ngramsPatch = do
570 root_of_parent <- use (at parent . _Just . nre_root)
571 let
572 root = fromMaybe parent root_of_parent
573 rp = RootParent { _rp_root = root, _rp_parent = parent }
574 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
575 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
576
577 reParentNgramsTablePatch :: ReParent NgramsTablePatch
578 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
579 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
580
581 ------------------------------------------------------------------------
582 ------------------------------------------------------------------------
583 type Version = Int
584
585 data Versioned a = Versioned
586 { _v_version :: Version
587 , _v_data :: a
588 }
589 deriving (Generic, Show)
590 deriveJSON (unPrefix "_v_") ''Versioned
591 makeLenses ''Versioned
592 instance ToSchema a => ToSchema (Versioned a)
593 instance Arbitrary a => Arbitrary (Versioned a) where
594 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
595
596 {-
597 -- TODO sequencs of modifications (Patchs)
598 type NgramsIdPatch = Patch NgramsId NgramsPatch
599
600 ngramsPatch :: Int -> NgramsPatch
601 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
602
603 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
604 toEdit n p = Edit n p
605 ngramsIdPatch :: Patch NgramsId NgramsPatch
606 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
607 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
608 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
609 ]
610
611 -- applyPatchBack :: Patch -> IO Patch
612 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
613 -}
614 ------------------------------------------------------------------------
615 ------------------------------------------------------------------------
616 ------------------------------------------------------------------------
617
618 {-
619 -- TODO: Replace.old is ignored which means that if the current list
620 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
621 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
622 -- However this should not happen in non conflicting situations.
623 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
624 mkListsUpdate nt patches =
625 [ (ngramsTypeId nt, ng, listTypeId lt)
626 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
627 , lt <- patch ^.. patch_list . new
628 ]
629
630 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
631 -> NgramsType
632 -> NgramsTablePatch
633 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
634 mkChildrenGroups addOrRem nt patches =
635 [ (ngramsTypeId nt, parent, child)
636 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
637 , child <- patch ^.. patch_children . to addOrRem . folded
638 ]
639 -}
640
641 ngramsTypeFromTabType :: TabType -> NgramsType
642 ngramsTypeFromTabType tabType =
643 let lieu = "Garg.API.Ngrams: " :: Text in
644 case tabType of
645 Sources -> Ngrams.Sources
646 Authors -> Ngrams.Authors
647 Institutes -> Ngrams.Institutes
648 Terms -> Ngrams.NgramsTerms
649 _ -> panic $ lieu <> "No Ngrams for this tab"
650 -- TODO: This `panic` would disapear with custom NgramsType.
651
652 ------------------------------------------------------------------------
653 data Repo s p = Repo
654 { _r_version :: Version
655 , _r_state :: s
656 , _r_history :: [p]
657 -- first patch in the list is the most recent
658 }
659 deriving (Generic)
660
661 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
662 parseJSON = genericParseJSON $ unPrefix "_r_"
663
664 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
665 toJSON = genericToJSON $ unPrefix "_r_"
666 toEncoding = genericToEncoding $ unPrefix "_r_"
667
668 makeLenses ''Repo
669
670 initRepo :: Monoid s => Repo s p
671 initRepo = Repo 1 mempty []
672
673 type NgramsRepo = Repo NgramsState NgramsStatePatch
674 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
675 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
676
677 initMockRepo :: NgramsRepo
678 initMockRepo = Repo 1 s []
679 where
680 s = Map.singleton Ngrams.NgramsTerms
681 $ Map.singleton 47254
682 $ Map.fromList
683 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
684
685 data RepoEnv = RepoEnv
686 { _renv_var :: !(MVar NgramsRepo)
687 , _renv_saver :: !(IO ())
688 , _renv_lock :: !FileLock
689 }
690 deriving (Generic)
691
692 makeLenses ''RepoEnv
693
694 class HasRepoVar env where
695 repoVar :: Getter env (MVar NgramsRepo)
696
697 instance HasRepoVar (MVar NgramsRepo) where
698 repoVar = identity
699
700 class HasRepoSaver env where
701 repoSaver :: Getter env (IO ())
702
703 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
704 repoEnv :: Getter env RepoEnv
705
706 instance HasRepo RepoEnv where
707 repoEnv = identity
708
709 instance HasRepoVar RepoEnv where
710 repoVar = renv_var
711
712 instance HasRepoSaver RepoEnv where
713 repoSaver = renv_saver
714
715 type RepoCmdM env err m =
716 ( MonadReader env m
717 , MonadError err m
718 , MonadIO m
719 , HasRepo env
720 )
721 ------------------------------------------------------------------------
722
723 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
724 => m ()
725 saveRepo = liftIO =<< view repoSaver
726
727 listTypeConflictResolution :: ListType -> ListType -> ListType
728 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
729
730 ngramsStatePatchConflictResolution
731 :: NgramsType -> NodeId -> NgramsTerm
732 -> ConflictResolutionNgramsPatch
733 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
734 = (const ours, ours)
735 -- undefined {- TODO think this through -}, listTypeConflictResolution)
736
737 -- Current state:
738 -- Insertions are not considered as patches,
739 -- they do not extend history,
740 -- they do not bump version.
741 insertNewOnly :: a -> Maybe b -> a
742 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
743 -- TODO error handling
744
745 something :: Monoid a => Maybe a -> a
746 something Nothing = mempty
747 something (Just a) = a
748
749 {- unused
750 -- TODO refactor with putListNgrams
751 copyListNgrams :: RepoCmdM env err m
752 => NodeId -> NodeId -> NgramsType
753 -> m ()
754 copyListNgrams srcListId dstListId ngramsType = do
755 var <- view repoVar
756 liftIO $ modifyMVar_ var $
757 pure . (r_state . at ngramsType %~ (Just . f . something))
758 saveRepo
759 where
760 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
761 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
762
763 -- TODO refactor with putListNgrams
764 -- The list must be non-empty!
765 -- The added ngrams must be non-existent!
766 addListNgrams :: RepoCmdM env err m
767 => NodeId -> NgramsType
768 -> [NgramsElement] -> m ()
769 addListNgrams listId ngramsType nes = do
770 var <- view repoVar
771 liftIO $ modifyMVar_ var $
772 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
773 saveRepo
774 where
775 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
776 -}
777
778 -- If the given list of ngrams elements contains ngrams already in
779 -- the repo, they will overwrite the old ones.
780 putListNgrams :: RepoCmdM env err m
781 => NodeId -> NgramsType
782 -> [NgramsElement] -> m ()
783 putListNgrams _ _ [] = pure ()
784 putListNgrams listId ngramsType nes = do
785 -- printDebug "putListNgrams" (length nes)
786 var <- view repoVar
787 liftIO $ modifyMVar_ var $
788 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (m <>) . something)) . something))
789 saveRepo
790 where
791 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
792
793 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> [NgramsElement] -> m ()
794 tableNgramsPost tabType listId = putListNgrams listId (ngramsTypeFromTabType tabType)
795
796 -- Apply the given patch to the DB and returns the patch to be applied on the
797 -- client.
798 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
799 => TabType -> ListId
800 -> Versioned NgramsTablePatch
801 -> m (Versioned NgramsTablePatch)
802 tableNgramsPut tabType listId (Versioned p_version p_table)
803 | p_table == mempty = do
804 let ngramsType = ngramsTypeFromTabType tabType
805
806 var <- view repoVar
807 r <- liftIO $ readMVar var
808
809 let
810 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
811 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
812
813 pure (Versioned (r ^. r_version) q_table)
814
815 | otherwise = do
816 let ngramsType = ngramsTypeFromTabType tabType
817 (p0, p0_validity) = PM.singleton listId p_table
818 (p, p_validity) = PM.singleton ngramsType p0
819
820 assertValid p0_validity
821 assertValid p_validity
822
823 var <- view repoVar
824 vq' <- liftIO $ modifyMVar var $ \r -> do
825 let
826 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
827 (p', q') = transformWith ngramsStatePatchConflictResolution p q
828 r' = r & r_version +~ 1
829 & r_state %~ act p'
830 & r_history %~ (p' :)
831 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
832 {-
833 -- Ideally we would like to check these properties. However:
834 -- * They should be checked only to debug the code. The client data
835 -- should be able to trigger these.
836 -- * What kind of error should they throw (we are in IO here)?
837 -- * Should we keep modifyMVar?
838 -- * Should we throw the validation in an Exception, catch it around
839 -- modifyMVar and throw it back as an Error?
840 assertValid $ transformable p q
841 assertValid $ applicable p' (r ^. r_state)
842 -}
843 pure (r', Versioned (r' ^. r_version) q'_table)
844
845 saveRepo
846 pure vq'
847
848 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
849 mergeNgramsElement _neOld neNew = neNew
850 {-
851 { _ne_list :: ListType
852 If we merge the parents/children we can potentially create cycles!
853 , _ne_parent :: Maybe NgramsTerm
854 , _ne_children :: MSet NgramsTerm
855 }
856 -}
857
858 getNgramsTableMap :: RepoCmdM env err m
859 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
860 getNgramsTableMap nodeId ngramsType = do
861 v <- view repoVar
862 repo <- liftIO $ readMVar v
863 pure $ Versioned (repo ^. r_version)
864 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
865
866 type MinSize = Int
867 type MaxSize = Int
868
869 -- | TODO Errors management
870 -- TODO: polymorphic for Annuaire or Corpus or ...
871 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
872 -- TODO: should take only one ListId
873
874
875
876
877 getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
878 => NodeType -> NodeId -> TabType
879 -> ListId -> Limit -> Maybe Offset
880 -> Maybe ListType
881 -> Maybe MinSize -> Maybe MaxSize
882 -> Maybe OrderBy
883 -> (NgramsTerm -> Bool)
884 -> m (Versioned NgramsTable)
885 getTableNgrams nType nId tabType listId limit_ offset
886 listType minSize maxSize orderBy searchQuery = do
887
888 lIds <- selectNodesWithUsername NodeList userMaster
889 let
890 ngramsType = ngramsTypeFromTabType tabType
891 offset' = maybe 0 identity offset
892 listType' = maybe (const True) (==) listType
893 minSize' = maybe (const True) (<=) minSize
894 maxSize' = maybe (const True) (>=) maxSize
895
896 selected_node n = minSize' s
897 && maxSize' s
898 && searchQuery (n ^. ne_ngrams)
899 && listType' (n ^. ne_list)
900 where
901 s = n ^. ne_size
902
903 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
904
905 ---------------------------------------
906 sortOnOrder Nothing = identity
907 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
908 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
909 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
910 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
911
912 ---------------------------------------
913 selectAndPaginate tableMap (NgramsTable list) = NgramsTable $ roots <> inners
914 where
915 rootOf ne = maybe ne (\r -> ngramsElementFromRepo (r, fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. v_data . at r)))
916 (ne ^. ne_root)
917 selected_nodes = list & take limit_
918 . drop offset'
919 . filter selected_node
920 . sortOnOrder orderBy
921 roots = rootOf <$> selected_nodes
922 rootsSet = Set.fromList (_ne_ngrams <$> roots)
923 inners = list & filter (selected_inner rootsSet)
924
925 ---------------------------------------
926 setScores False table = pure table
927 setScores True table = do
928 let ngrams_terms = (table ^.. v_data . _NgramsTable . each . ne_ngrams)
929 occurrences <- getOccByNgramsOnlySlow nType nId
930 (lIds <> [listId])
931 ngramsType
932 ngrams_terms
933
934 let
935 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
936
937 pure $ table & v_data . _NgramsTable . each %~ setOcc
938 ---------------------------------------
939
940 -- lists <- catMaybes <$> listsWith userMaster
941 -- trace (show lists) $
942 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
943
944 tableMap <- getNgramsTableMap listId ngramsType
945 let nSco = needsScores orderBy
946 table <- tableMap & v_data %~ (NgramsTable . fmap ngramsElementFromRepo . Map.toList)
947 & setScores nSco
948 setScores (not nSco) $ table & v_data %~ selectAndPaginate tableMap
949
950
951 -- APIs
952
953 -- TODO: find a better place for the code above, All APIs stay here
954 type QueryParamR = QueryParam' '[Required, Strict]
955
956
957 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
958 deriving (Generic, Enum, Bounded, Read, Show)
959
960 instance FromHttpApiData OrderBy
961 where
962 parseUrlPiece "TermAsc" = pure TermAsc
963 parseUrlPiece "TermDesc" = pure TermDesc
964 parseUrlPiece "ScoreAsc" = pure ScoreAsc
965 parseUrlPiece "ScoreDesc" = pure ScoreDesc
966 parseUrlPiece _ = Left "Unexpected value of OrderBy"
967
968 instance ToParamSchema OrderBy
969 instance FromJSON OrderBy
970 instance ToJSON OrderBy
971 instance ToSchema OrderBy
972 instance Arbitrary OrderBy
973 where
974 arbitrary = elements [minBound..maxBound]
975
976 needsScores :: Maybe OrderBy -> Bool
977 needsScores (Just ScoreAsc) = True
978 needsScores (Just ScoreDesc) = True
979 needsScores _ = False
980
981 type TableNgramsApiGet = Summary " Table Ngrams API Get"
982 :> QueryParamR "ngramsType" TabType
983 :> QueryParamR "list" ListId
984 :> QueryParamR "limit" Limit
985 :> QueryParam "offset" Offset
986 :> QueryParam "listType" ListType
987 :> QueryParam "minTermSize" MinSize
988 :> QueryParam "maxTermSize" MaxSize
989 :> QueryParam "orderBy" OrderBy
990 :> QueryParam "search" Text
991 :> Get '[JSON] (Versioned NgramsTable)
992
993 type TableNgramsApiPut = Summary " Table Ngrams API Change"
994 :> QueryParamR "ngramsType" TabType
995 :> QueryParamR "list" ListId
996 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
997 :> Put '[JSON] (Versioned NgramsTablePatch)
998
999 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1000 :> QueryParamR "ngramsType" TabType
1001 :> QueryParamR "list" ListId
1002 :> ReqBody '[JSON] [NgramsElement]
1003 :> Post '[JSON] ()
1004
1005 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1006 => NodeId -> TabType
1007 -> ListId -> Limit -> Maybe Offset
1008 -> Maybe ListType
1009 -> Maybe MinSize -> Maybe MaxSize
1010 -> Maybe OrderBy
1011 -> Maybe Text -- full text search
1012 -> m (Versioned NgramsTable)
1013 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1014 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1015 where
1016 searchQuery = maybe (const True) isInfixOf mt
1017
1018 -- | Text search is deactivated for now for ngrams by doc only
1019 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1020 => DocId -> TabType
1021 -> ListId -> Limit -> Maybe Offset
1022 -> Maybe ListType
1023 -> Maybe MinSize -> Maybe MaxSize
1024 -> Maybe OrderBy
1025 -> Maybe Text -- full text search
1026 -> m (Versioned NgramsTable)
1027 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1028 ns <- selectNodesWithUsername NodeList userMaster
1029 let ngramsType = ngramsTypeFromTabType tabType
1030 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1031 let searchQuery = flip S.member (S.fromList ngs)
1032 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1033
1034
1035
1036
1037
1038
1039
1040 --{-
1041 -- TODO Doc Table Ngrams API
1042 type ApiNgramsTableDoc = TableNgramsApiGet
1043 :<|> TableNgramsApiPut
1044 :<|> TableNgramsApiPost
1045
1046 apiNgramsTableDoc :: ( RepoCmdM env err m
1047 , HasNodeError err
1048 , HasInvalidError err
1049 , HasConnection env
1050 )
1051 => DocId -> ServerT ApiNgramsTableDoc m
1052 apiNgramsTableDoc dId = getTableNgramsDoc dId
1053 :<|> tableNgramsPut
1054 :<|> tableNgramsPost
1055 -- > add new ngrams in database (TODO AD)
1056 -- > index all the corpus accordingly (TODO AD)
1057