]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Revert "[FIX] bwd_entropy : bwd tokens."
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# LANGUAGE ConstraintKinds #-}
19 {-# LANGUAGE DataKinds #-}
20 {-# LANGUAGE DeriveGeneric #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE ScopedTypeVariables #-}
24 {-# LANGUAGE TemplateHaskell #-}
25 {-# LANGUAGE TypeOperators #-}
26 {-# LANGUAGE FlexibleContexts #-}
27 {-# LANGUAGE FlexibleInstances #-}
28 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
29 {-# LANGUAGE MultiParamTypeClasses #-}
30 {-# LANGUAGE RankNTypes #-}
31 {-# LANGUAGE TypeFamilies #-}
32 {-# OPTIONS -fno-warn-orphans #-}
33
34 module Gargantext.API.Ngrams
35 where
36
37 -- import Debug.Trace (trace)
38 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
39 -- import Gargantext.Database.Schema.User (UserId)
40 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
41 Composable(..), Transformable(..),
42 PairPatch(..), Patched, ConflictResolution,
43 ConflictResolutionReplace, ours)
44 import qualified Data.Map.Strict.Patch as PM
45 import Data.Monoid
46 import Data.Ord (Down(..))
47 import Data.Foldable
48 --import Data.Semigroup
49 import Data.Set (Set)
50 import qualified Data.Set as S
51 import qualified Data.List as List
52 import Data.Maybe (fromMaybe)
53 -- import Data.Tuple.Extra (first)
54 import qualified Data.Map.Strict as Map
55 import Data.Map.Strict (Map)
56 import qualified Data.Set as Set
57 import Control.Category ((>>>))
58 import Control.Concurrent
59 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~))
60 import Control.Monad.Error.Class (MonadError)
61 import Control.Monad.Reader
62 import Control.Monad.State
63 import Data.Aeson hiding ((.=))
64 import Data.Aeson.TH (deriveJSON)
65 import Data.Either(Either(Left))
66 -- import Data.Map (lookup)
67 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
68 import Data.Swagger hiding (version, patch)
69 import Data.Text (Text, isInfixOf, count)
70 import Data.Validity
71 import GHC.Generics (Generic)
72 import Gargantext.Core.Utils.Prefix (unPrefix)
73 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
74 import Gargantext.Database.Config (userMaster)
75 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySlow)
76 import Gargantext.Database.Schema.Ngrams (NgramsType)
77 import Gargantext.Database.Types.Node (NodeType(..))
78 import Gargantext.Database.Utils (fromField', HasConnection)
79 import Gargantext.Database.Node.Select
80 import Gargantext.Database.Ngrams
81 --import Gargantext.Database.Lists (listsWith)
82 import Gargantext.Database.Schema.Node (HasNodeError)
83 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
84 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
85 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
86 import Gargantext.Prelude
87 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
88 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
89 import Servant hiding (Patch)
90 import System.FileLock (FileLock)
91 import Test.QuickCheck (elements)
92 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
93
94 data TODO = TODO
95 deriving (Generic)
96
97 instance ToSchema TODO where
98 instance ToParamSchema TODO where
99
100 ------------------------------------------------------------------------
101 --data FacetFormat = Table | Chart
102 data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
103 | Contacts
104 deriving (Generic, Enum, Bounded)
105
106 instance FromHttpApiData TabType
107 where
108 parseUrlPiece "Docs" = pure Docs
109 parseUrlPiece "Terms" = pure Terms
110 parseUrlPiece "Sources" = pure Sources
111 parseUrlPiece "Institutes" = pure Institutes
112 parseUrlPiece "Authors" = pure Authors
113 parseUrlPiece "Trash" = pure Trash
114
115 parseUrlPiece "Contacts" = pure Contacts
116
117 parseUrlPiece _ = Left "Unexpected value of TabType"
118
119 instance ToParamSchema TabType
120 instance ToJSON TabType
121 instance FromJSON TabType
122 instance ToSchema TabType
123 instance Arbitrary TabType
124 where
125 arbitrary = elements [minBound .. maxBound]
126
127 newtype MSet a = MSet (Map a ())
128 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
129
130 instance ToJSON a => ToJSON (MSet a) where
131 toJSON (MSet m) = toJSON (Map.keys m)
132 toEncoding (MSet m) = toEncoding (Map.keys m)
133
134 mSetFromSet :: Set a -> MSet a
135 mSetFromSet = MSet . Map.fromSet (const ())
136
137 mSetFromList :: Ord a => [a] -> MSet a
138 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
139
140 -- mSetToSet :: Ord a => MSet a -> Set a
141 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
142 mSetToSet :: Ord a => MSet a -> Set a
143 mSetToSet = Set.fromList . mSetToList
144
145 mSetToList :: MSet a -> [a]
146 mSetToList (MSet a) = Map.keys a
147
148 instance Foldable MSet where
149 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
150
151 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
152 parseJSON = fmap mSetFromList . parseJSON
153
154 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
155 -- TODO
156 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
157
158 ------------------------------------------------------------------------
159 type NgramsTerm = Text
160
161 data RootParent = RootParent
162 { _rp_root :: NgramsTerm
163 , _rp_parent :: NgramsTerm
164 }
165 deriving (Ord, Eq, Show, Generic)
166
167 deriveJSON (unPrefix "_rp_") ''RootParent
168 makeLenses ''RootParent
169
170 data NgramsRepoElement = NgramsRepoElement
171 { _nre_size :: Int
172 , _nre_list :: ListType
173 --, _nre_root_parent :: Maybe RootParent
174 , _nre_root :: Maybe NgramsTerm
175 , _nre_parent :: Maybe NgramsTerm
176 , _nre_children :: MSet NgramsTerm
177 }
178 deriving (Ord, Eq, Show, Generic)
179
180 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
181 makeLenses ''NgramsRepoElement
182
183 data NgramsElement =
184 NgramsElement { _ne_ngrams :: NgramsTerm
185 , _ne_size :: Int
186 , _ne_list :: ListType
187 , _ne_occurrences :: Int
188 , _ne_root :: Maybe NgramsTerm
189 , _ne_parent :: Maybe NgramsTerm
190 , _ne_children :: MSet NgramsTerm
191 }
192 deriving (Ord, Eq, Show, Generic)
193
194 deriveJSON (unPrefix "_ne_") ''NgramsElement
195 makeLenses ''NgramsElement
196
197 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
198 mkNgramsElement ngrams list rp children =
199 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
200 where
201 -- TODO review
202 size = 1 + count " " ngrams
203
204 newNgramsElement :: NgramsTerm -> NgramsElement
205 newNgramsElement ngrams = mkNgramsElement ngrams GraphTerm Nothing mempty
206
207 instance ToSchema NgramsElement
208 instance Arbitrary NgramsElement where
209 arbitrary = elements [newNgramsElement "sport"]
210
211 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
212 ngramsElementToRepo
213 (NgramsElement { _ne_size = s
214 , _ne_list = l
215 , _ne_root = r
216 , _ne_parent = p
217 , _ne_children = c
218 }) =
219 NgramsRepoElement
220 { _nre_size = s
221 , _nre_list = l
222 , _nre_parent = p
223 , _nre_root = r
224 , _nre_children = c
225 }
226
227 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
228 ngramsElementFromRepo
229 ngrams
230 (NgramsRepoElement
231 { _nre_size = s
232 , _nre_list = l
233 , _nre_parent = p
234 , _nre_root = r
235 , _nre_children = c
236 }) =
237 NgramsElement { _ne_size = s
238 , _ne_list = l
239 , _ne_root = r
240 , _ne_parent = p
241 , _ne_children = c
242 , _ne_ngrams = ngrams
243 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
244 {-
245 -- Here we could use 0 if we want to avoid any `panic`.
246 -- It will not happen using getTableNgrams if
247 -- getOccByNgramsOnly provides a count of occurrences for
248 -- all the ngrams given.
249 -}
250 }
251
252 ------------------------------------------------------------------------
253 newtype NgramsTable = NgramsTable [NgramsElement]
254 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
255
256 type ListNgrams = NgramsTable
257
258 makePrisms ''NgramsTable
259
260 -- | Question: why these repetition of Type in this instance
261 -- may you document it please ?
262 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
263 each = _NgramsTable . each
264
265 -- TODO discuss
266 -- | TODO Check N and Weight
267 {-
268 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
269 toNgramsElement ns = map toNgramsElement' ns
270 where
271 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
272 where
273 p' = case p of
274 Nothing -> Nothing
275 Just x -> lookup x mapParent
276 c' = maybe mempty identity $ lookup t mapChildren
277 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
278
279 mapParent :: Map Int Text
280 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
281
282 mapChildren :: Map Text (Set Text)
283 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
284 $ Map.fromListWith (<>)
285 $ map (first fromJust)
286 $ filter (isJust . fst)
287 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
288 -}
289
290 mockTable :: NgramsTable
291 mockTable = NgramsTable
292 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
293 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
294 , mkNgramsElement "cats" StopTerm Nothing mempty
295 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
296 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
297 , mkNgramsElement "fox" GraphTerm Nothing mempty
298 , mkNgramsElement "object" CandidateTerm Nothing mempty
299 , mkNgramsElement "nothing" StopTerm Nothing mempty
300 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
301 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
302 , mkNgramsElement "moon" CandidateTerm Nothing mempty
303 , mkNgramsElement "sky" StopTerm Nothing mempty
304 ]
305 where
306 rp n = Just $ RootParent n n
307
308 instance Arbitrary NgramsTable where
309 arbitrary = pure mockTable
310
311 instance ToSchema NgramsTable
312
313 ------------------------------------------------------------------------
314 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
315
316 ------------------------------------------------------------------------
317 -- On the Client side:
318 --data Action = InGroup NgramsId NgramsId
319 -- | OutGroup NgramsId NgramsId
320 -- | SetListType NgramsId ListType
321
322 data PatchSet a = PatchSet
323 { _rem :: Set a
324 , _add :: Set a
325 }
326 deriving (Eq, Ord, Show, Generic)
327
328 makeLenses ''PatchSet
329 makePrisms ''PatchSet
330
331 instance ToJSON a => ToJSON (PatchSet a) where
332 toJSON = genericToJSON $ unPrefix "_"
333 toEncoding = genericToEncoding $ unPrefix "_"
334
335 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
336 parseJSON = genericParseJSON $ unPrefix "_"
337
338 {-
339 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
340 arbitrary = PatchSet <$> arbitrary <*> arbitrary
341
342 type instance Patched (PatchSet a) = Set a
343
344 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
345 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
346
347 instance Ord a => Semigroup (PatchSet a) where
348 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
349 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
350 } -- TODO Review
351
352 instance Ord a => Monoid (PatchSet a) where
353 mempty = PatchSet mempty mempty
354
355 instance Ord a => Group (PatchSet a) where
356 invert (PatchSet r a) = PatchSet a r
357
358 instance Ord a => Composable (PatchSet a) where
359 composable _ _ = undefined
360
361 instance Ord a => Action (PatchSet a) (Set a) where
362 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
363
364 instance Applicable (PatchSet a) (Set a) where
365 applicable _ _ = mempty
366
367 instance Ord a => Validity (PatchSet a) where
368 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
369
370 instance Ord a => Transformable (PatchSet a) where
371 transformable = undefined
372
373 conflicts _p _q = undefined
374
375 transformWith conflict p q = undefined conflict p q
376
377 instance ToSchema a => ToSchema (PatchSet a)
378 -}
379
380 type AddRem = Replace (Maybe ())
381
382 remPatch, addPatch :: AddRem
383 remPatch = replace (Just ()) Nothing
384 addPatch = replace Nothing (Just ())
385
386 isRem :: Replace (Maybe ()) -> Bool
387 isRem = (== remPatch)
388
389 type PatchMap = PM.PatchMap
390
391 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
392 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
393 Transformable, Composable)
394
395 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
396 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
397
398 -- TODO this breaks module abstraction
399 makePrisms ''PM.PatchMap
400
401 makePrisms ''PatchMSet
402
403 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
404 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
405 where
406 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
407 f = Map.partition isRem >>> both %~ Map.keysSet
408
409 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
410 g (rems, adds) = Map.fromSet (const remPatch) rems
411 <> Map.fromSet (const addPatch) adds
412
413 instance Ord a => Action (PatchMSet a) (MSet a) where
414 act (PatchMSet p) (MSet m) = MSet $ act p m
415
416 instance Ord a => Applicable (PatchMSet a) (MSet a) where
417 applicable (PatchMSet p) (MSet m) = applicable p m
418
419 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
420 toJSON = toJSON . view _PatchMSetIso
421 toEncoding = toEncoding . view _PatchMSetIso
422
423 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
424 parseJSON = fmap (_PatchMSetIso #) . parseJSON
425
426 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
427 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
428
429 instance ToSchema a => ToSchema (PatchMSet a) where
430 -- TODO
431 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
432
433 type instance Patched (PatchMSet a) = MSet a
434
435 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
436 arbitrary = uncurry replace <$> arbitrary
437 -- If they happen to be equal then the patch is Keep.
438
439 instance ToSchema a => ToSchema (Replace a) where
440 declareNamedSchema (_ :: proxy (Replace a)) = do
441 -- TODO Keep constructor is not supported here.
442 aSchema <- declareSchemaRef (Proxy :: Proxy a)
443 return $ NamedSchema (Just "Replace") $ mempty
444 & type_ .~ SwaggerObject
445 & properties .~
446 InsOrdHashMap.fromList
447 [ ("old", aSchema)
448 , ("new", aSchema)
449 ]
450 & required .~ [ "old", "new" ]
451
452 data NgramsPatch =
453 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
454 , _patch_list :: Replace ListType -- TODO Map UserId ListType
455 }
456 deriving (Eq, Show, Generic)
457
458 deriveJSON (unPrefix "_") ''NgramsPatch
459 makeLenses ''NgramsPatch
460
461 instance ToSchema NgramsPatch
462
463 instance Arbitrary NgramsPatch where
464 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
465
466 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
467
468 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
469 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
470
471 instance Semigroup NgramsPatch where
472 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
473
474 instance Monoid NgramsPatch where
475 mempty = _NgramsPatch # mempty
476
477 instance Validity NgramsPatch where
478 validate p = p ^. _NgramsPatch . to validate
479
480 instance Transformable NgramsPatch where
481 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
482
483 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
484
485 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
486 where
487 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
488
489 type ConflictResolutionNgramsPatch =
490 ( ConflictResolutionPatchMSet NgramsTerm
491 , ConflictResolutionReplace ListType
492 )
493 type instance ConflictResolution NgramsPatch =
494 ConflictResolutionNgramsPatch
495
496 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
497 -- ~ Patched NgramsPatchIso
498 type instance Patched NgramsPatch = PatchedNgramsPatch
499
500 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
501 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
502 applicable p (Just nre) =
503 applicable (p ^. patch_children) (nre ^. nre_children) <>
504 applicable (p ^. patch_list) (nre ^. nre_list)
505
506 instance Action NgramsPatch NgramsRepoElement where
507 act p = (nre_children %~ act (p ^. patch_children))
508 . (nre_list %~ act (p ^. patch_list))
509
510 instance Action NgramsPatch (Maybe NgramsRepoElement) where
511 act = fmap . act
512
513 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
514 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
515
516 instance FromField NgramsTablePatch
517 where
518 fromField = fromField'
519
520 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
521 where
522 fromField = fromField'
523
524 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
525 --
526 type instance ConflictResolution NgramsTablePatch =
527 NgramsTerm -> ConflictResolutionNgramsPatch
528
529 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
530 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
531 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
532
533 makePrisms ''NgramsTablePatch
534 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
535 instance ToSchema NgramsTablePatch
536
537 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
538 applicable p = applicable (p ^. _NgramsTablePatch)
539
540 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
541 act p =
542 fmap (execState (reParentNgramsTablePatch p)) .
543 act (p ^. _NgramsTablePatch)
544
545 instance Arbitrary NgramsTablePatch where
546 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
547
548 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
549 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
550 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
551
552 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
553
554 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
555 reRootChildren root ngram = do
556 nre <- use $ at ngram
557 forOf_ (_Just . nre_children . folded) nre $ \child -> do
558 at child . _Just . nre_root ?= root
559 reRootChildren root child
560
561 reParent :: Maybe RootParent -> ReParent NgramsTerm
562 reParent rp child = do
563 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
564 . (nre_root .~ (_rp_root <$> rp))
565 )
566 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
567
568 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
569 reParentAddRem rp child p =
570 reParent (if isRem p then Nothing else Just rp) child
571
572 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
573 reParentNgramsPatch parent ngramsPatch = do
574 root_of_parent <- use (at parent . _Just . nre_root)
575 let
576 root = fromMaybe parent root_of_parent
577 rp = RootParent { _rp_root = root, _rp_parent = parent }
578 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
579 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
580
581 reParentNgramsTablePatch :: ReParent NgramsTablePatch
582 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
583 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
584
585 ------------------------------------------------------------------------
586 ------------------------------------------------------------------------
587 type Version = Int
588
589 data Versioned a = Versioned
590 { _v_version :: Version
591 , _v_data :: a
592 }
593 deriving (Generic, Show)
594 deriveJSON (unPrefix "_v_") ''Versioned
595 makeLenses ''Versioned
596 instance ToSchema a => ToSchema (Versioned a)
597 instance Arbitrary a => Arbitrary (Versioned a) where
598 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
599
600 {-
601 -- TODO sequencs of modifications (Patchs)
602 type NgramsIdPatch = Patch NgramsId NgramsPatch
603
604 ngramsPatch :: Int -> NgramsPatch
605 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
606
607 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
608 toEdit n p = Edit n p
609 ngramsIdPatch :: Patch NgramsId NgramsPatch
610 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
611 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
612 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
613 ]
614
615 -- applyPatchBack :: Patch -> IO Patch
616 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
617 -}
618 ------------------------------------------------------------------------
619 ------------------------------------------------------------------------
620 ------------------------------------------------------------------------
621
622 {-
623 -- TODO: Replace.old is ignored which means that if the current list
624 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
625 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
626 -- However this should not happen in non conflicting situations.
627 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
628 mkListsUpdate nt patches =
629 [ (ngramsTypeId nt, ng, listTypeId lt)
630 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
631 , lt <- patch ^.. patch_list . new
632 ]
633
634 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
635 -> NgramsType
636 -> NgramsTablePatch
637 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
638 mkChildrenGroups addOrRem nt patches =
639 [ (ngramsTypeId nt, parent, child)
640 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
641 , child <- patch ^.. patch_children . to addOrRem . folded
642 ]
643 -}
644
645 ngramsTypeFromTabType :: TabType -> NgramsType
646 ngramsTypeFromTabType tabType =
647 let lieu = "Garg.API.Ngrams: " :: Text in
648 case tabType of
649 Sources -> Ngrams.Sources
650 Authors -> Ngrams.Authors
651 Institutes -> Ngrams.Institutes
652 Terms -> Ngrams.NgramsTerms
653 _ -> panic $ lieu <> "No Ngrams for this tab"
654 -- TODO: This `panic` would disapear with custom NgramsType.
655
656 ------------------------------------------------------------------------
657 data Repo s p = Repo
658 { _r_version :: Version
659 , _r_state :: s
660 , _r_history :: [p]
661 -- first patch in the list is the most recent
662 }
663 deriving (Generic)
664
665 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
666 parseJSON = genericParseJSON $ unPrefix "_r_"
667
668 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
669 toJSON = genericToJSON $ unPrefix "_r_"
670 toEncoding = genericToEncoding $ unPrefix "_r_"
671
672 makeLenses ''Repo
673
674 initRepo :: Monoid s => Repo s p
675 initRepo = Repo 1 mempty []
676
677 type NgramsRepo = Repo NgramsState NgramsStatePatch
678 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
679 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
680
681 initMockRepo :: NgramsRepo
682 initMockRepo = Repo 1 s []
683 where
684 s = Map.singleton Ngrams.NgramsTerms
685 $ Map.singleton 47254
686 $ Map.fromList
687 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
688
689 data RepoEnv = RepoEnv
690 { _renv_var :: !(MVar NgramsRepo)
691 , _renv_saver :: !(IO ())
692 , _renv_lock :: !FileLock
693 }
694 deriving (Generic)
695
696 makeLenses ''RepoEnv
697
698 class HasRepoVar env where
699 repoVar :: Getter env (MVar NgramsRepo)
700
701 instance HasRepoVar (MVar NgramsRepo) where
702 repoVar = identity
703
704 class HasRepoSaver env where
705 repoSaver :: Getter env (IO ())
706
707 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
708 repoEnv :: Getter env RepoEnv
709
710 instance HasRepo RepoEnv where
711 repoEnv = identity
712
713 instance HasRepoVar RepoEnv where
714 repoVar = renv_var
715
716 instance HasRepoSaver RepoEnv where
717 repoSaver = renv_saver
718
719 type RepoCmdM env err m =
720 ( MonadReader env m
721 , MonadError err m
722 , MonadIO m
723 , HasRepo env
724 )
725 ------------------------------------------------------------------------
726
727 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
728 => m ()
729 saveRepo = liftIO =<< view repoSaver
730
731 listTypeConflictResolution :: ListType -> ListType -> ListType
732 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
733
734 ngramsStatePatchConflictResolution
735 :: NgramsType -> NodeId -> NgramsTerm
736 -> ConflictResolutionNgramsPatch
737 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
738 = (const ours, ours)
739 -- undefined {- TODO think this through -}, listTypeConflictResolution)
740
741 -- Current state:
742 -- Insertions are not considered as patches,
743 -- they do not extend history,
744 -- they do not bump version.
745 insertNewOnly :: a -> Maybe b -> a
746 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
747 -- TODO error handling
748
749 something :: Monoid a => Maybe a -> a
750 something Nothing = mempty
751 something (Just a) = a
752
753 {- unused
754 -- TODO refactor with putListNgrams
755 copyListNgrams :: RepoCmdM env err m
756 => NodeId -> NodeId -> NgramsType
757 -> m ()
758 copyListNgrams srcListId dstListId ngramsType = do
759 var <- view repoVar
760 liftIO $ modifyMVar_ var $
761 pure . (r_state . at ngramsType %~ (Just . f . something))
762 saveRepo
763 where
764 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
765 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
766
767 -- TODO refactor with putListNgrams
768 -- The list must be non-empty!
769 -- The added ngrams must be non-existent!
770 addListNgrams :: RepoCmdM env err m
771 => NodeId -> NgramsType
772 -> [NgramsElement] -> m ()
773 addListNgrams listId ngramsType nes = do
774 var <- view repoVar
775 liftIO $ modifyMVar_ var $
776 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
777 saveRepo
778 where
779 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
780 -}
781
782 -- If the given list of ngrams elements contains ngrams already in
783 -- the repo, they will be ignored.
784 putListNgrams :: RepoCmdM env err m
785 => NodeId -> NgramsType
786 -> [NgramsElement] -> m ()
787 putListNgrams _ _ [] = pure ()
788 putListNgrams listId ngramsType nes = do
789 -- printDebug "putListNgrams" (length nes)
790 var <- view repoVar
791 liftIO $ modifyMVar_ var $
792 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (<> m) . something)) . something))
793 saveRepo
794 where
795 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
796
797 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> [NgramsTerm] -> m ()
798 tableNgramsPost tabType listId =
799 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap newNgramsElement
800
801 -- Apply the given patch to the DB and returns the patch to be applied on the
802 -- client.
803 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
804 => TabType -> ListId
805 -> Versioned NgramsTablePatch
806 -> m (Versioned NgramsTablePatch)
807 tableNgramsPut tabType listId (Versioned p_version p_table)
808 | p_table == mempty = do
809 let ngramsType = ngramsTypeFromTabType tabType
810
811 var <- view repoVar
812 r <- liftIO $ readMVar var
813
814 let
815 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
816 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
817
818 pure (Versioned (r ^. r_version) q_table)
819
820 | otherwise = do
821 let ngramsType = ngramsTypeFromTabType tabType
822 (p0, p0_validity) = PM.singleton listId p_table
823 (p, p_validity) = PM.singleton ngramsType p0
824
825 assertValid p0_validity
826 assertValid p_validity
827
828 var <- view repoVar
829 vq' <- liftIO $ modifyMVar var $ \r -> do
830 let
831 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
832 (p', q') = transformWith ngramsStatePatchConflictResolution p q
833 r' = r & r_version +~ 1
834 & r_state %~ act p'
835 & r_history %~ (p' :)
836 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
837 {-
838 -- Ideally we would like to check these properties. However:
839 -- * They should be checked only to debug the code. The client data
840 -- should be able to trigger these.
841 -- * What kind of error should they throw (we are in IO here)?
842 -- * Should we keep modifyMVar?
843 -- * Should we throw the validation in an Exception, catch it around
844 -- modifyMVar and throw it back as an Error?
845 assertValid $ transformable p q
846 assertValid $ applicable p' (r ^. r_state)
847 -}
848 pure (r', Versioned (r' ^. r_version) q'_table)
849
850 saveRepo
851 pure vq'
852
853 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
854 mergeNgramsElement _neOld neNew = neNew
855 {-
856 { _ne_list :: ListType
857 If we merge the parents/children we can potentially create cycles!
858 , _ne_parent :: Maybe NgramsTerm
859 , _ne_children :: MSet NgramsTerm
860 }
861 -}
862
863 getNgramsTableMap :: RepoCmdM env err m
864 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
865 getNgramsTableMap nodeId ngramsType = do
866 v <- view repoVar
867 repo <- liftIO $ readMVar v
868 pure $ Versioned (repo ^. r_version)
869 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
870
871 type MinSize = Int
872 type MaxSize = Int
873
874 -- | TODO Errors management
875 -- TODO: polymorphic for Annuaire or Corpus or ...
876 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
877 -- TODO: should take only one ListId
878
879
880
881
882 getTableNgrams :: forall env err m.
883 (RepoCmdM env err m, HasNodeError err, HasConnection env)
884 => NodeType -> NodeId -> TabType
885 -> ListId -> Limit -> Maybe Offset
886 -> Maybe ListType
887 -> Maybe MinSize -> Maybe MaxSize
888 -> Maybe OrderBy
889 -> (NgramsTerm -> Bool)
890 -> m (Versioned NgramsTable)
891 getTableNgrams nType nId tabType listId limit_ offset
892 listType minSize maxSize orderBy searchQuery = do
893
894 lIds <- selectNodesWithUsername NodeList userMaster
895 let
896 ngramsType = ngramsTypeFromTabType tabType
897 offset' = maybe 0 identity offset
898 listType' = maybe (const True) (==) listType
899 minSize' = maybe (const True) (<=) minSize
900 maxSize' = maybe (const True) (>=) maxSize
901
902 selected_node n = minSize' s
903 && maxSize' s
904 && searchQuery (n ^. ne_ngrams)
905 && listType' (n ^. ne_list)
906 where
907 s = n ^. ne_size
908
909 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
910
911 ---------------------------------------
912 sortOnOrder Nothing = identity
913 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
914 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
915 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
916 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
917
918 ---------------------------------------
919 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
920 selectAndPaginate tableMap = roots <> inners
921 where
922 list = tableMap ^.. each
923 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
924 (ne ^. ne_root)
925 selected_nodes = list & take limit_
926 . drop offset'
927 . filter selected_node
928 . sortOnOrder orderBy
929 roots = rootOf <$> selected_nodes
930 rootsSet = Set.fromList (_ne_ngrams <$> roots)
931 inners = list & filter (selected_inner rootsSet)
932
933 ---------------------------------------
934 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
935 setScores False table = pure table
936 setScores True table = do
937 let ngrams_terms = (table ^.. each . ne_ngrams)
938 occurrences <- getOccByNgramsOnlySlow nType nId
939 (lIds <> [listId])
940 ngramsType
941 ngrams_terms
942
943 let
944 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
945
946 pure $ table & each %~ setOcc
947 ---------------------------------------
948
949 -- lists <- catMaybes <$> listsWith userMaster
950 -- trace (show lists) $
951 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
952
953 let nSco = needsScores orderBy
954 tableMap1 <- getNgramsTableMap listId ngramsType
955 tableMap2 <- tableMap1 & v_data %%~ setScores nSco
956 . Map.mapWithKey ngramsElementFromRepo
957 tableMap2 & v_data %%~ fmap NgramsTable
958 . setScores (not nSco)
959 . selectAndPaginate
960
961 -- APIs
962
963 -- TODO: find a better place for the code above, All APIs stay here
964 type QueryParamR = QueryParam' '[Required, Strict]
965
966
967 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
968 deriving (Generic, Enum, Bounded, Read, Show)
969
970 instance FromHttpApiData OrderBy
971 where
972 parseUrlPiece "TermAsc" = pure TermAsc
973 parseUrlPiece "TermDesc" = pure TermDesc
974 parseUrlPiece "ScoreAsc" = pure ScoreAsc
975 parseUrlPiece "ScoreDesc" = pure ScoreDesc
976 parseUrlPiece _ = Left "Unexpected value of OrderBy"
977
978 instance ToParamSchema OrderBy
979 instance FromJSON OrderBy
980 instance ToJSON OrderBy
981 instance ToSchema OrderBy
982 instance Arbitrary OrderBy
983 where
984 arbitrary = elements [minBound..maxBound]
985
986 needsScores :: Maybe OrderBy -> Bool
987 needsScores (Just ScoreAsc) = True
988 needsScores (Just ScoreDesc) = True
989 needsScores _ = False
990
991 type TableNgramsApiGet = Summary " Table Ngrams API Get"
992 :> QueryParamR "ngramsType" TabType
993 :> QueryParamR "list" ListId
994 :> QueryParamR "limit" Limit
995 :> QueryParam "offset" Offset
996 :> QueryParam "listType" ListType
997 :> QueryParam "minTermSize" MinSize
998 :> QueryParam "maxTermSize" MaxSize
999 :> QueryParam "orderBy" OrderBy
1000 :> QueryParam "search" Text
1001 :> Get '[JSON] (Versioned NgramsTable)
1002
1003 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1004 :> QueryParamR "ngramsType" TabType
1005 :> QueryParamR "list" ListId
1006 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1007 :> Put '[JSON] (Versioned NgramsTablePatch)
1008
1009 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1010 :> QueryParamR "ngramsType" TabType
1011 :> QueryParamR "list" ListId
1012 :> ReqBody '[JSON] [NgramsTerm]
1013 :> Post '[JSON] ()
1014
1015 type TableNgramsApi = TableNgramsApiGet
1016 :<|> TableNgramsApiPut
1017 :<|> TableNgramsApiPost
1018
1019 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1020 => NodeId -> TabType
1021 -> ListId -> Limit -> Maybe Offset
1022 -> Maybe ListType
1023 -> Maybe MinSize -> Maybe MaxSize
1024 -> Maybe OrderBy
1025 -> Maybe Text -- full text search
1026 -> m (Versioned NgramsTable)
1027 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1028 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1029 where
1030 searchQuery = maybe (const True) isInfixOf mt
1031
1032 -- | Text search is deactivated for now for ngrams by doc only
1033 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1034 => DocId -> TabType
1035 -> ListId -> Limit -> Maybe Offset
1036 -> Maybe ListType
1037 -> Maybe MinSize -> Maybe MaxSize
1038 -> Maybe OrderBy
1039 -> Maybe Text -- full text search
1040 -> m (Versioned NgramsTable)
1041 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1042 ns <- selectNodesWithUsername NodeList userMaster
1043 let ngramsType = ngramsTypeFromTabType tabType
1044 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1045 let searchQuery = flip S.member (S.fromList ngs)
1046 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1047
1048
1049
1050
1051
1052 apiNgramsTableCorpus :: ( RepoCmdM env err m
1053 , HasNodeError err
1054 , HasInvalidError err
1055 , HasConnection env
1056 )
1057 => NodeId -> ServerT TableNgramsApi m
1058 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1059 :<|> tableNgramsPut
1060 :<|> tableNgramsPost
1061
1062
1063 apiNgramsTableDoc :: ( RepoCmdM env err m
1064 , HasNodeError err
1065 , HasInvalidError err
1066 , HasConnection env
1067 )
1068 => DocId -> ServerT TableNgramsApi m
1069 apiNgramsTableDoc dId = getTableNgramsDoc dId
1070 :<|> tableNgramsPut
1071 :<|> tableNgramsPost
1072 -- > add new ngrams in database (TODO AD)
1073 -- > index all the corpus accordingly (TODO AD)
1074