]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[PARSERS] refactoring.
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# LANGUAGE ConstraintKinds #-}
19 {-# LANGUAGE DataKinds #-}
20 {-# LANGUAGE DeriveGeneric #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE ScopedTypeVariables #-}
24 {-# LANGUAGE TemplateHaskell #-}
25 {-# LANGUAGE TypeOperators #-}
26 {-# LANGUAGE FlexibleContexts #-}
27 {-# LANGUAGE FlexibleInstances #-}
28 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
29 {-# LANGUAGE MultiParamTypeClasses #-}
30 {-# LANGUAGE RankNTypes #-}
31 {-# LANGUAGE TypeFamilies #-}
32 {-# OPTIONS -fno-warn-orphans #-}
33
34 module Gargantext.API.Ngrams
35 where
36
37 -- import Debug.Trace (trace)
38 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
39 -- import Gargantext.Database.Schema.User (UserId)
40 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
41 Composable(..), Transformable(..),
42 PairPatch(..), Patched, ConflictResolution,
43 ConflictResolutionReplace, ours)
44 import qualified Data.Map.Strict.Patch as PM
45 import Data.Monoid
46 import Data.Ord (Down(..))
47 import Data.Foldable
48 --import Data.Semigroup
49 import Data.Set (Set)
50 import qualified Data.Set as S
51 import qualified Data.List as List
52 import Data.Maybe (fromMaybe)
53 -- import Data.Tuple.Extra (first)
54 import qualified Data.Map.Strict as Map
55 import Data.Map.Strict (Map)
56 import qualified Data.Set as Set
57 import Control.Category ((>>>))
58 import Control.Concurrent
59 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~))
60 import Control.Monad.Error.Class (MonadError)
61 import Control.Monad.Reader
62 import Control.Monad.State
63 import Data.Aeson hiding ((.=))
64 import Data.Aeson.TH (deriveJSON)
65 import Data.Either(Either(Left))
66 -- import Data.Map (lookup)
67 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
68 import Data.Swagger hiding (version, patch)
69 import Data.Text (Text, isInfixOf, count)
70 import Data.Validity
71 import GHC.Generics (Generic)
72 import Gargantext.Core.Utils.Prefix (unPrefix)
73 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
74 import Gargantext.Database.Config (userMaster)
75 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySlow)
76 import Gargantext.Database.Schema.Ngrams (NgramsType)
77 import Gargantext.Database.Types.Node (NodeType(..))
78 import Gargantext.Database.Utils (fromField', HasConnection)
79 import Gargantext.Database.Node.Select
80 import Gargantext.Database.Ngrams
81 --import Gargantext.Database.Lists (listsWith)
82 import Gargantext.Database.Schema.Node (HasNodeError)
83 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
84 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
85 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
86 import Gargantext.Prelude
87 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
88 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
89 import Servant hiding (Patch)
90 import System.FileLock (FileLock)
91 import Test.QuickCheck (elements)
92 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
93
94 data TODO = TODO
95 deriving (Generic)
96
97 instance ToSchema TODO where
98
99 ------------------------------------------------------------------------
100 --data FacetFormat = Table | Chart
101 data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
102 | Contacts
103 deriving (Generic, Enum, Bounded)
104
105 instance FromHttpApiData TabType
106 where
107 parseUrlPiece "Docs" = pure Docs
108 parseUrlPiece "Terms" = pure Terms
109 parseUrlPiece "Sources" = pure Sources
110 parseUrlPiece "Institutes" = pure Institutes
111 parseUrlPiece "Authors" = pure Authors
112 parseUrlPiece "Trash" = pure Trash
113
114 parseUrlPiece "Contacts" = pure Contacts
115
116 parseUrlPiece _ = Left "Unexpected value of TabType"
117
118 instance ToParamSchema TabType
119 instance ToJSON TabType
120 instance FromJSON TabType
121 instance ToSchema TabType
122 instance Arbitrary TabType
123 where
124 arbitrary = elements [minBound .. maxBound]
125
126 newtype MSet a = MSet (Map a ())
127 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
128
129 instance ToJSON a => ToJSON (MSet a) where
130 toJSON (MSet m) = toJSON (Map.keys m)
131 toEncoding (MSet m) = toEncoding (Map.keys m)
132
133 mSetFromSet :: Set a -> MSet a
134 mSetFromSet = MSet . Map.fromSet (const ())
135
136 mSetFromList :: Ord a => [a] -> MSet a
137 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
138
139 -- mSetToSet :: Ord a => MSet a -> Set a
140 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
141 mSetToSet :: Ord a => MSet a -> Set a
142 mSetToSet = Set.fromList . mSetToList
143
144 mSetToList :: MSet a -> [a]
145 mSetToList (MSet a) = Map.keys a
146
147 instance Foldable MSet where
148 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
149
150 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
151 parseJSON = fmap mSetFromList . parseJSON
152
153 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
154 -- TODO
155 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
156
157 ------------------------------------------------------------------------
158 type NgramsTerm = Text
159
160 data RootParent = RootParent
161 { _rp_root :: NgramsTerm
162 , _rp_parent :: NgramsTerm
163 }
164 deriving (Ord, Eq, Show, Generic)
165
166 deriveJSON (unPrefix "_rp_") ''RootParent
167 makeLenses ''RootParent
168
169 data NgramsRepoElement = NgramsRepoElement
170 { _nre_size :: Int
171 , _nre_list :: ListType
172 --, _nre_root_parent :: Maybe RootParent
173 , _nre_root :: Maybe NgramsTerm
174 , _nre_parent :: Maybe NgramsTerm
175 , _nre_children :: MSet NgramsTerm
176 }
177 deriving (Ord, Eq, Show, Generic)
178
179 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
180 makeLenses ''NgramsRepoElement
181
182 data NgramsElement =
183 NgramsElement { _ne_ngrams :: NgramsTerm
184 , _ne_size :: Int
185 , _ne_list :: ListType
186 , _ne_occurrences :: Int
187 , _ne_root :: Maybe NgramsTerm
188 , _ne_parent :: Maybe NgramsTerm
189 , _ne_children :: MSet NgramsTerm
190 }
191 deriving (Ord, Eq, Show, Generic)
192
193 deriveJSON (unPrefix "_ne_") ''NgramsElement
194 makeLenses ''NgramsElement
195
196 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
197 mkNgramsElement ngrams list rp children =
198 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
199 where
200 -- TODO review
201 size = 1 + count " " ngrams
202
203 newNgramsElement :: NgramsTerm -> NgramsElement
204 newNgramsElement ngrams = mkNgramsElement ngrams GraphTerm Nothing mempty
205
206 instance ToSchema NgramsElement
207 instance Arbitrary NgramsElement where
208 arbitrary = elements [newNgramsElement "sport"]
209
210 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
211 ngramsElementToRepo
212 (NgramsElement { _ne_size = s
213 , _ne_list = l
214 , _ne_root = r
215 , _ne_parent = p
216 , _ne_children = c
217 }) =
218 NgramsRepoElement
219 { _nre_size = s
220 , _nre_list = l
221 , _nre_parent = p
222 , _nre_root = r
223 , _nre_children = c
224 }
225
226 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
227 ngramsElementFromRepo
228 ngrams
229 (NgramsRepoElement
230 { _nre_size = s
231 , _nre_list = l
232 , _nre_parent = p
233 , _nre_root = r
234 , _nre_children = c
235 }) =
236 NgramsElement { _ne_size = s
237 , _ne_list = l
238 , _ne_root = r
239 , _ne_parent = p
240 , _ne_children = c
241 , _ne_ngrams = ngrams
242 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
243 {-
244 -- Here we could use 0 if we want to avoid any `panic`.
245 -- It will not happen using getTableNgrams if
246 -- getOccByNgramsOnly provides a count of occurrences for
247 -- all the ngrams given.
248 -}
249 }
250
251 ------------------------------------------------------------------------
252 newtype NgramsTable = NgramsTable [NgramsElement]
253 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
254
255 type ListNgrams = NgramsTable
256
257 makePrisms ''NgramsTable
258
259 -- | Question: why these repetition of Type in this instance
260 -- may you document it please ?
261 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
262 each = _NgramsTable . each
263
264 -- TODO discuss
265 -- | TODO Check N and Weight
266 {-
267 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
268 toNgramsElement ns = map toNgramsElement' ns
269 where
270 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
271 where
272 p' = case p of
273 Nothing -> Nothing
274 Just x -> lookup x mapParent
275 c' = maybe mempty identity $ lookup t mapChildren
276 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
277
278 mapParent :: Map Int Text
279 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
280
281 mapChildren :: Map Text (Set Text)
282 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
283 $ Map.fromListWith (<>)
284 $ map (first fromJust)
285 $ filter (isJust . fst)
286 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
287 -}
288
289 mockTable :: NgramsTable
290 mockTable = NgramsTable
291 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
292 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
293 , mkNgramsElement "cats" StopTerm Nothing mempty
294 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
295 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
296 , mkNgramsElement "fox" GraphTerm Nothing mempty
297 , mkNgramsElement "object" CandidateTerm Nothing mempty
298 , mkNgramsElement "nothing" StopTerm Nothing mempty
299 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
300 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
301 , mkNgramsElement "moon" CandidateTerm Nothing mempty
302 , mkNgramsElement "sky" StopTerm Nothing mempty
303 ]
304 where
305 rp n = Just $ RootParent n n
306
307 instance Arbitrary NgramsTable where
308 arbitrary = pure mockTable
309
310 instance ToSchema NgramsTable
311
312 ------------------------------------------------------------------------
313 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
314
315 ------------------------------------------------------------------------
316 -- On the Client side:
317 --data Action = InGroup NgramsId NgramsId
318 -- | OutGroup NgramsId NgramsId
319 -- | SetListType NgramsId ListType
320
321 data PatchSet a = PatchSet
322 { _rem :: Set a
323 , _add :: Set a
324 }
325 deriving (Eq, Ord, Show, Generic)
326
327 makeLenses ''PatchSet
328 makePrisms ''PatchSet
329
330 instance ToJSON a => ToJSON (PatchSet a) where
331 toJSON = genericToJSON $ unPrefix "_"
332 toEncoding = genericToEncoding $ unPrefix "_"
333
334 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
335 parseJSON = genericParseJSON $ unPrefix "_"
336
337 {-
338 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
339 arbitrary = PatchSet <$> arbitrary <*> arbitrary
340
341 type instance Patched (PatchSet a) = Set a
342
343 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
344 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
345
346 instance Ord a => Semigroup (PatchSet a) where
347 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
348 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
349 } -- TODO Review
350
351 instance Ord a => Monoid (PatchSet a) where
352 mempty = PatchSet mempty mempty
353
354 instance Ord a => Group (PatchSet a) where
355 invert (PatchSet r a) = PatchSet a r
356
357 instance Ord a => Composable (PatchSet a) where
358 composable _ _ = undefined
359
360 instance Ord a => Action (PatchSet a) (Set a) where
361 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
362
363 instance Applicable (PatchSet a) (Set a) where
364 applicable _ _ = mempty
365
366 instance Ord a => Validity (PatchSet a) where
367 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
368
369 instance Ord a => Transformable (PatchSet a) where
370 transformable = undefined
371
372 conflicts _p _q = undefined
373
374 transformWith conflict p q = undefined conflict p q
375
376 instance ToSchema a => ToSchema (PatchSet a)
377 -}
378
379 type AddRem = Replace (Maybe ())
380
381 remPatch, addPatch :: AddRem
382 remPatch = replace (Just ()) Nothing
383 addPatch = replace Nothing (Just ())
384
385 isRem :: Replace (Maybe ()) -> Bool
386 isRem = (== remPatch)
387
388 type PatchMap = PM.PatchMap
389
390 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
391 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
392 Transformable, Composable)
393
394 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
395 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
396
397 -- TODO this breaks module abstraction
398 makePrisms ''PM.PatchMap
399
400 makePrisms ''PatchMSet
401
402 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
403 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
404 where
405 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
406 f = Map.partition isRem >>> both %~ Map.keysSet
407
408 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
409 g (rems, adds) = Map.fromSet (const remPatch) rems
410 <> Map.fromSet (const addPatch) adds
411
412 instance Ord a => Action (PatchMSet a) (MSet a) where
413 act (PatchMSet p) (MSet m) = MSet $ act p m
414
415 instance Ord a => Applicable (PatchMSet a) (MSet a) where
416 applicable (PatchMSet p) (MSet m) = applicable p m
417
418 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
419 toJSON = toJSON . view _PatchMSetIso
420 toEncoding = toEncoding . view _PatchMSetIso
421
422 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
423 parseJSON = fmap (_PatchMSetIso #) . parseJSON
424
425 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
426 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
427
428 instance ToSchema a => ToSchema (PatchMSet a) where
429 -- TODO
430 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
431
432 type instance Patched (PatchMSet a) = MSet a
433
434 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
435 arbitrary = uncurry replace <$> arbitrary
436 -- If they happen to be equal then the patch is Keep.
437
438 instance ToSchema a => ToSchema (Replace a) where
439 declareNamedSchema (_ :: proxy (Replace a)) = do
440 -- TODO Keep constructor is not supported here.
441 aSchema <- declareSchemaRef (Proxy :: Proxy a)
442 return $ NamedSchema (Just "Replace") $ mempty
443 & type_ .~ SwaggerObject
444 & properties .~
445 InsOrdHashMap.fromList
446 [ ("old", aSchema)
447 , ("new", aSchema)
448 ]
449 & required .~ [ "old", "new" ]
450
451 data NgramsPatch =
452 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
453 , _patch_list :: Replace ListType -- TODO Map UserId ListType
454 }
455 deriving (Eq, Show, Generic)
456
457 deriveJSON (unPrefix "_") ''NgramsPatch
458 makeLenses ''NgramsPatch
459
460 instance ToSchema NgramsPatch
461
462 instance Arbitrary NgramsPatch where
463 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
464
465 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
466
467 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
468 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
469
470 instance Semigroup NgramsPatch where
471 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
472
473 instance Monoid NgramsPatch where
474 mempty = _NgramsPatch # mempty
475
476 instance Validity NgramsPatch where
477 validate p = p ^. _NgramsPatch . to validate
478
479 instance Transformable NgramsPatch where
480 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
481
482 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
483
484 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
485 where
486 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
487
488 type ConflictResolutionNgramsPatch =
489 ( ConflictResolutionPatchMSet NgramsTerm
490 , ConflictResolutionReplace ListType
491 )
492 type instance ConflictResolution NgramsPatch =
493 ConflictResolutionNgramsPatch
494
495 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
496 -- ~ Patched NgramsPatchIso
497 type instance Patched NgramsPatch = PatchedNgramsPatch
498
499 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
500 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
501 applicable p (Just nre) =
502 applicable (p ^. patch_children) (nre ^. nre_children) <>
503 applicable (p ^. patch_list) (nre ^. nre_list)
504
505 instance Action NgramsPatch NgramsRepoElement where
506 act p = (nre_children %~ act (p ^. patch_children))
507 . (nre_list %~ act (p ^. patch_list))
508
509 instance Action NgramsPatch (Maybe NgramsRepoElement) where
510 act = fmap . act
511
512 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
513 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
514
515 instance FromField NgramsTablePatch
516 where
517 fromField = fromField'
518
519 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
520 where
521 fromField = fromField'
522
523 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
524 --
525 type instance ConflictResolution NgramsTablePatch =
526 NgramsTerm -> ConflictResolutionNgramsPatch
527
528 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
529 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
530 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
531
532 makePrisms ''NgramsTablePatch
533 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
534 instance ToSchema NgramsTablePatch
535
536 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
537 applicable p = applicable (p ^. _NgramsTablePatch)
538
539 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
540 act p =
541 fmap (execState (reParentNgramsTablePatch p)) .
542 act (p ^. _NgramsTablePatch)
543
544 instance Arbitrary NgramsTablePatch where
545 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
546
547 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
548 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
549 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
550
551 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
552
553 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
554 reRootChildren root ngram = do
555 nre <- use $ at ngram
556 forOf_ (_Just . nre_children . folded) nre $ \child -> do
557 at child . _Just . nre_root ?= root
558 reRootChildren root child
559
560 reParent :: Maybe RootParent -> ReParent NgramsTerm
561 reParent rp child = do
562 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
563 . (nre_root .~ (_rp_root <$> rp))
564 )
565 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
566
567 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
568 reParentAddRem rp child p =
569 reParent (if isRem p then Nothing else Just rp) child
570
571 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
572 reParentNgramsPatch parent ngramsPatch = do
573 root_of_parent <- use (at parent . _Just . nre_root)
574 let
575 root = fromMaybe parent root_of_parent
576 rp = RootParent { _rp_root = root, _rp_parent = parent }
577 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
578 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
579
580 reParentNgramsTablePatch :: ReParent NgramsTablePatch
581 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
582 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
583
584 ------------------------------------------------------------------------
585 ------------------------------------------------------------------------
586 type Version = Int
587
588 data Versioned a = Versioned
589 { _v_version :: Version
590 , _v_data :: a
591 }
592 deriving (Generic, Show)
593 deriveJSON (unPrefix "_v_") ''Versioned
594 makeLenses ''Versioned
595 instance ToSchema a => ToSchema (Versioned a)
596 instance Arbitrary a => Arbitrary (Versioned a) where
597 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
598
599 {-
600 -- TODO sequencs of modifications (Patchs)
601 type NgramsIdPatch = Patch NgramsId NgramsPatch
602
603 ngramsPatch :: Int -> NgramsPatch
604 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
605
606 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
607 toEdit n p = Edit n p
608 ngramsIdPatch :: Patch NgramsId NgramsPatch
609 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
610 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
611 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
612 ]
613
614 -- applyPatchBack :: Patch -> IO Patch
615 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
616 -}
617 ------------------------------------------------------------------------
618 ------------------------------------------------------------------------
619 ------------------------------------------------------------------------
620
621 {-
622 -- TODO: Replace.old is ignored which means that if the current list
623 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
624 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
625 -- However this should not happen in non conflicting situations.
626 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
627 mkListsUpdate nt patches =
628 [ (ngramsTypeId nt, ng, listTypeId lt)
629 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
630 , lt <- patch ^.. patch_list . new
631 ]
632
633 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
634 -> NgramsType
635 -> NgramsTablePatch
636 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
637 mkChildrenGroups addOrRem nt patches =
638 [ (ngramsTypeId nt, parent, child)
639 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
640 , child <- patch ^.. patch_children . to addOrRem . folded
641 ]
642 -}
643
644 ngramsTypeFromTabType :: TabType -> NgramsType
645 ngramsTypeFromTabType tabType =
646 let lieu = "Garg.API.Ngrams: " :: Text in
647 case tabType of
648 Sources -> Ngrams.Sources
649 Authors -> Ngrams.Authors
650 Institutes -> Ngrams.Institutes
651 Terms -> Ngrams.NgramsTerms
652 _ -> panic $ lieu <> "No Ngrams for this tab"
653 -- TODO: This `panic` would disapear with custom NgramsType.
654
655 ------------------------------------------------------------------------
656 data Repo s p = Repo
657 { _r_version :: Version
658 , _r_state :: s
659 , _r_history :: [p]
660 -- first patch in the list is the most recent
661 }
662 deriving (Generic)
663
664 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
665 parseJSON = genericParseJSON $ unPrefix "_r_"
666
667 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
668 toJSON = genericToJSON $ unPrefix "_r_"
669 toEncoding = genericToEncoding $ unPrefix "_r_"
670
671 makeLenses ''Repo
672
673 initRepo :: Monoid s => Repo s p
674 initRepo = Repo 1 mempty []
675
676 type NgramsRepo = Repo NgramsState NgramsStatePatch
677 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
678 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
679
680 initMockRepo :: NgramsRepo
681 initMockRepo = Repo 1 s []
682 where
683 s = Map.singleton Ngrams.NgramsTerms
684 $ Map.singleton 47254
685 $ Map.fromList
686 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
687
688 data RepoEnv = RepoEnv
689 { _renv_var :: !(MVar NgramsRepo)
690 , _renv_saver :: !(IO ())
691 , _renv_lock :: !FileLock
692 }
693 deriving (Generic)
694
695 makeLenses ''RepoEnv
696
697 class HasRepoVar env where
698 repoVar :: Getter env (MVar NgramsRepo)
699
700 instance HasRepoVar (MVar NgramsRepo) where
701 repoVar = identity
702
703 class HasRepoSaver env where
704 repoSaver :: Getter env (IO ())
705
706 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
707 repoEnv :: Getter env RepoEnv
708
709 instance HasRepo RepoEnv where
710 repoEnv = identity
711
712 instance HasRepoVar RepoEnv where
713 repoVar = renv_var
714
715 instance HasRepoSaver RepoEnv where
716 repoSaver = renv_saver
717
718 type RepoCmdM env err m =
719 ( MonadReader env m
720 , MonadError err m
721 , MonadIO m
722 , HasRepo env
723 )
724 ------------------------------------------------------------------------
725
726 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
727 => m ()
728 saveRepo = liftIO =<< view repoSaver
729
730 listTypeConflictResolution :: ListType -> ListType -> ListType
731 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
732
733 ngramsStatePatchConflictResolution
734 :: NgramsType -> NodeId -> NgramsTerm
735 -> ConflictResolutionNgramsPatch
736 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
737 = (const ours, ours)
738 -- undefined {- TODO think this through -}, listTypeConflictResolution)
739
740 -- Current state:
741 -- Insertions are not considered as patches,
742 -- they do not extend history,
743 -- they do not bump version.
744 insertNewOnly :: a -> Maybe b -> a
745 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
746 -- TODO error handling
747
748 something :: Monoid a => Maybe a -> a
749 something Nothing = mempty
750 something (Just a) = a
751
752 {- unused
753 -- TODO refactor with putListNgrams
754 copyListNgrams :: RepoCmdM env err m
755 => NodeId -> NodeId -> NgramsType
756 -> m ()
757 copyListNgrams srcListId dstListId ngramsType = do
758 var <- view repoVar
759 liftIO $ modifyMVar_ var $
760 pure . (r_state . at ngramsType %~ (Just . f . something))
761 saveRepo
762 where
763 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
764 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
765
766 -- TODO refactor with putListNgrams
767 -- The list must be non-empty!
768 -- The added ngrams must be non-existent!
769 addListNgrams :: RepoCmdM env err m
770 => NodeId -> NgramsType
771 -> [NgramsElement] -> m ()
772 addListNgrams listId ngramsType nes = do
773 var <- view repoVar
774 liftIO $ modifyMVar_ var $
775 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
776 saveRepo
777 where
778 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
779 -}
780
781 -- If the given list of ngrams elements contains ngrams already in
782 -- the repo, they will be ignored.
783 putListNgrams :: RepoCmdM env err m
784 => NodeId -> NgramsType
785 -> [NgramsElement] -> m ()
786 putListNgrams _ _ [] = pure ()
787 putListNgrams listId ngramsType nes = do
788 -- printDebug "putListNgrams" (length nes)
789 var <- view repoVar
790 liftIO $ modifyMVar_ var $
791 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (<> m) . something)) . something))
792 saveRepo
793 where
794 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
795
796 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> [NgramsTerm] -> m ()
797 tableNgramsPost tabType listId =
798 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap newNgramsElement
799
800 -- Apply the given patch to the DB and returns the patch to be applied on the
801 -- client.
802 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
803 => TabType -> ListId
804 -> Versioned NgramsTablePatch
805 -> m (Versioned NgramsTablePatch)
806 tableNgramsPut tabType listId (Versioned p_version p_table)
807 | p_table == mempty = do
808 let ngramsType = ngramsTypeFromTabType tabType
809
810 var <- view repoVar
811 r <- liftIO $ readMVar var
812
813 let
814 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
815 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
816
817 pure (Versioned (r ^. r_version) q_table)
818
819 | otherwise = do
820 let ngramsType = ngramsTypeFromTabType tabType
821 (p0, p0_validity) = PM.singleton listId p_table
822 (p, p_validity) = PM.singleton ngramsType p0
823
824 assertValid p0_validity
825 assertValid p_validity
826
827 var <- view repoVar
828 vq' <- liftIO $ modifyMVar var $ \r -> do
829 let
830 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
831 (p', q') = transformWith ngramsStatePatchConflictResolution p q
832 r' = r & r_version +~ 1
833 & r_state %~ act p'
834 & r_history %~ (p' :)
835 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
836 {-
837 -- Ideally we would like to check these properties. However:
838 -- * They should be checked only to debug the code. The client data
839 -- should be able to trigger these.
840 -- * What kind of error should they throw (we are in IO here)?
841 -- * Should we keep modifyMVar?
842 -- * Should we throw the validation in an Exception, catch it around
843 -- modifyMVar and throw it back as an Error?
844 assertValid $ transformable p q
845 assertValid $ applicable p' (r ^. r_state)
846 -}
847 pure (r', Versioned (r' ^. r_version) q'_table)
848
849 saveRepo
850 pure vq'
851
852 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
853 mergeNgramsElement _neOld neNew = neNew
854 {-
855 { _ne_list :: ListType
856 If we merge the parents/children we can potentially create cycles!
857 , _ne_parent :: Maybe NgramsTerm
858 , _ne_children :: MSet NgramsTerm
859 }
860 -}
861
862 getNgramsTableMap :: RepoCmdM env err m
863 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
864 getNgramsTableMap nodeId ngramsType = do
865 v <- view repoVar
866 repo <- liftIO $ readMVar v
867 pure $ Versioned (repo ^. r_version)
868 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
869
870 type MinSize = Int
871 type MaxSize = Int
872
873 -- | TODO Errors management
874 -- TODO: polymorphic for Annuaire or Corpus or ...
875 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
876 -- TODO: should take only one ListId
877
878
879
880
881 getTableNgrams :: forall env err m.
882 (RepoCmdM env err m, HasNodeError err, HasConnection env)
883 => NodeType -> NodeId -> TabType
884 -> ListId -> Limit -> Maybe Offset
885 -> Maybe ListType
886 -> Maybe MinSize -> Maybe MaxSize
887 -> Maybe OrderBy
888 -> (NgramsTerm -> Bool)
889 -> m (Versioned NgramsTable)
890 getTableNgrams nType nId tabType listId limit_ offset
891 listType minSize maxSize orderBy searchQuery = do
892
893 lIds <- selectNodesWithUsername NodeList userMaster
894 let
895 ngramsType = ngramsTypeFromTabType tabType
896 offset' = maybe 0 identity offset
897 listType' = maybe (const True) (==) listType
898 minSize' = maybe (const True) (<=) minSize
899 maxSize' = maybe (const True) (>=) maxSize
900
901 selected_node n = minSize' s
902 && maxSize' s
903 && searchQuery (n ^. ne_ngrams)
904 && listType' (n ^. ne_list)
905 where
906 s = n ^. ne_size
907
908 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
909
910 ---------------------------------------
911 sortOnOrder Nothing = identity
912 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
913 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
914 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
915 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
916
917 ---------------------------------------
918 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
919 selectAndPaginate tableMap = roots <> inners
920 where
921 list = tableMap ^.. each
922 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
923 (ne ^. ne_root)
924 selected_nodes = list & take limit_
925 . drop offset'
926 . filter selected_node
927 . sortOnOrder orderBy
928 roots = rootOf <$> selected_nodes
929 rootsSet = Set.fromList (_ne_ngrams <$> roots)
930 inners = list & filter (selected_inner rootsSet)
931
932 ---------------------------------------
933 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
934 setScores False table = pure table
935 setScores True table = do
936 let ngrams_terms = (table ^.. each . ne_ngrams)
937 occurrences <- getOccByNgramsOnlySlow nType nId
938 (lIds <> [listId])
939 ngramsType
940 ngrams_terms
941
942 let
943 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
944
945 pure $ table & each %~ setOcc
946 ---------------------------------------
947
948 -- lists <- catMaybes <$> listsWith userMaster
949 -- trace (show lists) $
950 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
951
952 let nSco = needsScores orderBy
953 tableMap1 <- getNgramsTableMap listId ngramsType
954 tableMap2 <- tableMap1 & v_data %%~ setScores nSco
955 . Map.mapWithKey ngramsElementFromRepo
956 tableMap2 & v_data %%~ fmap NgramsTable
957 . setScores (not nSco)
958 . selectAndPaginate
959
960 -- APIs
961
962 -- TODO: find a better place for the code above, All APIs stay here
963 type QueryParamR = QueryParam' '[Required, Strict]
964
965
966 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
967 deriving (Generic, Enum, Bounded, Read, Show)
968
969 instance FromHttpApiData OrderBy
970 where
971 parseUrlPiece "TermAsc" = pure TermAsc
972 parseUrlPiece "TermDesc" = pure TermDesc
973 parseUrlPiece "ScoreAsc" = pure ScoreAsc
974 parseUrlPiece "ScoreDesc" = pure ScoreDesc
975 parseUrlPiece _ = Left "Unexpected value of OrderBy"
976
977 instance ToParamSchema OrderBy
978 instance FromJSON OrderBy
979 instance ToJSON OrderBy
980 instance ToSchema OrderBy
981 instance Arbitrary OrderBy
982 where
983 arbitrary = elements [minBound..maxBound]
984
985 needsScores :: Maybe OrderBy -> Bool
986 needsScores (Just ScoreAsc) = True
987 needsScores (Just ScoreDesc) = True
988 needsScores _ = False
989
990 type TableNgramsApiGet = Summary " Table Ngrams API Get"
991 :> QueryParamR "ngramsType" TabType
992 :> QueryParamR "list" ListId
993 :> QueryParamR "limit" Limit
994 :> QueryParam "offset" Offset
995 :> QueryParam "listType" ListType
996 :> QueryParam "minTermSize" MinSize
997 :> QueryParam "maxTermSize" MaxSize
998 :> QueryParam "orderBy" OrderBy
999 :> QueryParam "search" Text
1000 :> Get '[JSON] (Versioned NgramsTable)
1001
1002 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1003 :> QueryParamR "ngramsType" TabType
1004 :> QueryParamR "list" ListId
1005 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1006 :> Put '[JSON] (Versioned NgramsTablePatch)
1007
1008 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1009 :> QueryParamR "ngramsType" TabType
1010 :> QueryParamR "list" ListId
1011 :> ReqBody '[JSON] [NgramsTerm]
1012 :> Post '[JSON] ()
1013
1014 type TableNgramsApi = TableNgramsApiGet
1015 :<|> TableNgramsApiPut
1016 :<|> TableNgramsApiPost
1017
1018 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1019 => NodeId -> TabType
1020 -> ListId -> Limit -> Maybe Offset
1021 -> Maybe ListType
1022 -> Maybe MinSize -> Maybe MaxSize
1023 -> Maybe OrderBy
1024 -> Maybe Text -- full text search
1025 -> m (Versioned NgramsTable)
1026 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1027 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1028 where
1029 searchQuery = maybe (const True) isInfixOf mt
1030
1031 -- | Text search is deactivated for now for ngrams by doc only
1032 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1033 => DocId -> TabType
1034 -> ListId -> Limit -> Maybe Offset
1035 -> Maybe ListType
1036 -> Maybe MinSize -> Maybe MaxSize
1037 -> Maybe OrderBy
1038 -> Maybe Text -- full text search
1039 -> m (Versioned NgramsTable)
1040 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1041 ns <- selectNodesWithUsername NodeList userMaster
1042 let ngramsType = ngramsTypeFromTabType tabType
1043 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1044 let searchQuery = flip S.member (S.fromList ngs)
1045 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1046
1047
1048
1049
1050
1051 apiNgramsTableCorpus :: ( RepoCmdM env err m
1052 , HasNodeError err
1053 , HasInvalidError err
1054 , HasConnection env
1055 )
1056 => NodeId -> ServerT TableNgramsApi m
1057 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1058 :<|> tableNgramsPut
1059 :<|> tableNgramsPost
1060
1061
1062 apiNgramsTableDoc :: ( RepoCmdM env err m
1063 , HasNodeError err
1064 , HasInvalidError err
1065 , HasConnection env
1066 )
1067 => DocId -> ServerT TableNgramsApi m
1068 apiNgramsTableDoc dId = getTableNgramsDoc dId
1069 :<|> tableNgramsPut
1070 :<|> tableNgramsPost
1071 -- > add new ngrams in database (TODO AD)
1072 -- > index all the corpus accordingly (TODO AD)
1073