]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[PARSERS] RIS/PRESSE fix title and abstract field.
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# LANGUAGE ConstraintKinds #-}
19 {-# LANGUAGE DataKinds #-}
20 {-# LANGUAGE DeriveGeneric #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE ScopedTypeVariables #-}
24 {-# LANGUAGE TemplateHaskell #-}
25 {-# LANGUAGE TypeOperators #-}
26 {-# LANGUAGE FlexibleContexts #-}
27 {-# LANGUAGE FlexibleInstances #-}
28 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
29 {-# LANGUAGE MultiParamTypeClasses #-}
30 {-# LANGUAGE RankNTypes #-}
31 {-# LANGUAGE TypeFamilies #-}
32 {-# OPTIONS -fno-warn-orphans #-}
33
34 module Gargantext.API.Ngrams
35 where
36
37 -- import Debug.Trace (trace)
38 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
39 -- import Gargantext.Database.Schema.User (UserId)
40 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
41 Composable(..), Transformable(..),
42 PairPatch(..), Patched, ConflictResolution,
43 ConflictResolutionReplace, ours)
44 import qualified Data.Map.Strict.Patch as PM
45 import Data.Monoid
46 import Data.Foldable
47 --import Data.Semigroup
48 import Data.Set (Set)
49 import qualified Data.Set as S
50 -- import qualified Data.List as List
51 import Data.Maybe (fromMaybe)
52 -- import Data.Tuple.Extra (first)
53 import qualified Data.Map.Strict as Map
54 import Data.Map.Strict (Map)
55 import qualified Data.Set as Set
56 import Control.Category ((>>>))
57 import Control.Concurrent
58 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, mapped, forOf_)
59 import Control.Monad.Error.Class (MonadError)
60 import Control.Monad.Reader
61 import Control.Monad.State
62 import Data.Aeson hiding ((.=))
63 import Data.Aeson.TH (deriveJSON)
64 import Data.Either(Either(Left))
65 -- import Data.Map (lookup)
66 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
67 import Data.Swagger hiding (version, patch)
68 import Data.Text (Text, isInfixOf, count)
69 import Data.Validity
70 import GHC.Generics (Generic)
71 import Gargantext.Core.Utils.Prefix (unPrefix)
72 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
73 import Gargantext.Database.Config (userMaster)
74 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySafe)
75 import Gargantext.Database.Schema.Ngrams (NgramsType)
76 import Gargantext.Database.Types.Node (NodeType(..))
77 import Gargantext.Database.Utils (fromField', HasConnection)
78 import Gargantext.Database.Node.Select
79 import Gargantext.Database.Ngrams
80 --import Gargantext.Database.Lists (listsWith)
81 import Gargantext.Database.Schema.Node (HasNodeError)
82 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
83 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
84 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
85 import Gargantext.Prelude
86 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
87 import Gargantext.Core.Types (ListType(..), NodeId, ListId, CorpusId, DocId, Limit, Offset, HasInvalidError, assertValid)
88 import Servant hiding (Patch)
89 import System.FileLock (FileLock)
90 import Test.QuickCheck (elements)
91 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
92
93 data TODO = TODO
94 deriving (Generic)
95
96 instance ToSchema TODO where
97
98 ------------------------------------------------------------------------
99 --data FacetFormat = Table | Chart
100 data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
101 | Contacts
102 deriving (Generic, Enum, Bounded)
103
104 instance FromHttpApiData TabType
105 where
106 parseUrlPiece "Docs" = pure Docs
107 parseUrlPiece "Terms" = pure Terms
108 parseUrlPiece "Sources" = pure Sources
109 parseUrlPiece "Institutes" = pure Institutes
110 parseUrlPiece "Authors" = pure Authors
111 parseUrlPiece "Trash" = pure Trash
112
113 parseUrlPiece "Contacts" = pure Contacts
114
115 parseUrlPiece _ = Left "Unexpected value of TabType"
116
117 instance ToParamSchema TabType
118 instance ToJSON TabType
119 instance FromJSON TabType
120 instance ToSchema TabType
121 instance Arbitrary TabType
122 where
123 arbitrary = elements [minBound .. maxBound]
124
125 newtype MSet a = MSet (Map a ())
126 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
127
128 instance ToJSON a => ToJSON (MSet a) where
129 toJSON (MSet m) = toJSON (Map.keys m)
130 toEncoding (MSet m) = toEncoding (Map.keys m)
131
132 mSetFromSet :: Set a -> MSet a
133 mSetFromSet = MSet . Map.fromSet (const ())
134
135 mSetFromList :: Ord a => [a] -> MSet a
136 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
137
138 -- mSetToSet :: Ord a => MSet a -> Set a
139 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
140 mSetToSet :: Ord a => MSet a -> Set a
141 mSetToSet = Set.fromList . mSetToList
142
143 mSetToList :: MSet a -> [a]
144 mSetToList (MSet a) = Map.keys a
145
146 instance Foldable MSet where
147 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
148
149 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
150 parseJSON = fmap mSetFromList . parseJSON
151
152 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
153 -- TODO
154 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
155
156 ------------------------------------------------------------------------
157 type NgramsTerm = Text
158
159 data RootParent = RootParent
160 { _rp_root :: NgramsTerm
161 , _rp_parent :: NgramsTerm
162 }
163 deriving (Ord, Eq, Show, Generic)
164
165 deriveJSON (unPrefix "_rp_") ''RootParent
166 makeLenses ''RootParent
167
168 data NgramsRepoElement = NgramsRepoElement
169 { _nre_size :: Int
170 , _nre_list :: ListType
171 --, _nre_root_parent :: Maybe RootParent
172 , _nre_root :: Maybe NgramsTerm
173 , _nre_parent :: Maybe NgramsTerm
174 , _nre_children :: MSet NgramsTerm
175 }
176 deriving (Ord, Eq, Show, Generic)
177
178 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
179 makeLenses ''NgramsRepoElement
180
181 data NgramsElement =
182 NgramsElement { _ne_ngrams :: NgramsTerm
183 , _ne_size :: Int
184 , _ne_list :: ListType
185 , _ne_occurrences :: Int
186 , _ne_root :: Maybe NgramsTerm
187 , _ne_parent :: Maybe NgramsTerm
188 , _ne_children :: MSet NgramsTerm
189 }
190 deriving (Ord, Eq, Show, Generic)
191
192 deriveJSON (unPrefix "_ne_") ''NgramsElement
193 makeLenses ''NgramsElement
194
195 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
196 mkNgramsElement ngrams list rp children =
197 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
198 where
199 -- TODO review
200 size = 1 + count " " ngrams
201
202 instance ToSchema NgramsElement
203 instance Arbitrary NgramsElement where
204 arbitrary = elements [mkNgramsElement "sport" GraphTerm Nothing mempty]
205
206 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
207 ngramsElementToRepo
208 (NgramsElement { _ne_size = s
209 , _ne_list = l
210 , _ne_root = r
211 , _ne_parent = p
212 , _ne_children = c
213 }) =
214 NgramsRepoElement
215 { _nre_size = s
216 , _nre_list = l
217 , _nre_parent = p
218 , _nre_root = r
219 , _nre_children = c
220 }
221
222 ngramsElementFromRepo :: (NgramsTerm, NgramsRepoElement) -> NgramsElement
223 ngramsElementFromRepo
224 (ngrams,
225 NgramsRepoElement
226 { _nre_size = s
227 , _nre_list = l
228 , _nre_parent = p
229 , _nre_root = r
230 , _nre_children = c
231 }) =
232 NgramsElement { _ne_size = s
233 , _ne_list = l
234 , _ne_root = r
235 , _ne_parent = p
236 , _ne_children = c
237 , _ne_ngrams = ngrams
238 , _ne_occurrences = panic "API.Ngrams._ne_occurrences"
239 -- ^ Here we could use 0 if we want to avoid any `panic`.
240 -- It will not happen using getTableNgrams if
241 -- getOccByNgramsOnly provides a count of occurrences for
242 -- all the ngrams given.
243 }
244
245 ------------------------------------------------------------------------
246 newtype NgramsTable = NgramsTable [NgramsElement]
247 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
248
249 type ListNgrams = NgramsTable
250
251 makePrisms ''NgramsTable
252
253 -- | Question: why these repetition of Type in this instance
254 -- may you document it please ?
255 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
256 each = _NgramsTable . each
257
258 -- TODO discuss
259 -- | TODO Check N and Weight
260 {-
261 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
262 toNgramsElement ns = map toNgramsElement' ns
263 where
264 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
265 where
266 p' = case p of
267 Nothing -> Nothing
268 Just x -> lookup x mapParent
269 c' = maybe mempty identity $ lookup t mapChildren
270 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
271
272 mapParent :: Map Int Text
273 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
274
275 mapChildren :: Map Text (Set Text)
276 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
277 $ Map.fromListWith (<>)
278 $ map (first fromJust)
279 $ filter (isJust . fst)
280 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
281 -}
282
283 mockTable :: NgramsTable
284 mockTable = NgramsTable
285 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
286 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
287 , mkNgramsElement "cats" StopTerm Nothing mempty
288 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
289 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
290 , mkNgramsElement "fox" GraphTerm Nothing mempty
291 , mkNgramsElement "object" CandidateTerm Nothing mempty
292 , mkNgramsElement "nothing" StopTerm Nothing mempty
293 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
294 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
295 , mkNgramsElement "moon" CandidateTerm Nothing mempty
296 , mkNgramsElement "sky" StopTerm Nothing mempty
297 ]
298 where
299 rp n = Just $ RootParent n n
300
301 instance Arbitrary NgramsTable where
302 arbitrary = pure mockTable
303
304 instance ToSchema NgramsTable
305
306 ------------------------------------------------------------------------
307 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
308
309 ------------------------------------------------------------------------
310 -- On the Client side:
311 --data Action = InGroup NgramsId NgramsId
312 -- | OutGroup NgramsId NgramsId
313 -- | SetListType NgramsId ListType
314
315 data PatchSet a = PatchSet
316 { _rem :: Set a
317 , _add :: Set a
318 }
319 deriving (Eq, Ord, Show, Generic)
320
321 makeLenses ''PatchSet
322 makePrisms ''PatchSet
323
324 instance ToJSON a => ToJSON (PatchSet a) where
325 toJSON = genericToJSON $ unPrefix "_"
326 toEncoding = genericToEncoding $ unPrefix "_"
327
328 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
329 parseJSON = genericParseJSON $ unPrefix "_"
330
331 {-
332 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
333 arbitrary = PatchSet <$> arbitrary <*> arbitrary
334
335 type instance Patched (PatchSet a) = Set a
336
337 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
338 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
339
340 instance Ord a => Semigroup (PatchSet a) where
341 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
342 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
343 } -- TODO Review
344
345 instance Ord a => Monoid (PatchSet a) where
346 mempty = PatchSet mempty mempty
347
348 instance Ord a => Group (PatchSet a) where
349 invert (PatchSet r a) = PatchSet a r
350
351 instance Ord a => Composable (PatchSet a) where
352 composable _ _ = undefined
353
354 instance Ord a => Action (PatchSet a) (Set a) where
355 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
356
357 instance Applicable (PatchSet a) (Set a) where
358 applicable _ _ = mempty
359
360 instance Ord a => Validity (PatchSet a) where
361 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
362
363 instance Ord a => Transformable (PatchSet a) where
364 transformable = undefined
365
366 conflicts _p _q = undefined
367
368 transformWith conflict p q = undefined conflict p q
369
370 instance ToSchema a => ToSchema (PatchSet a)
371 -}
372
373 type AddRem = Replace (Maybe ())
374
375 remPatch, addPatch :: AddRem
376 remPatch = replace (Just ()) Nothing
377 addPatch = replace Nothing (Just ())
378
379 isRem :: Replace (Maybe ()) -> Bool
380 isRem = (== remPatch)
381
382 type PatchMap = PM.PatchMap
383
384 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
385 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
386 Transformable, Composable)
387
388 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
389 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
390
391 -- TODO this breaks module abstraction
392 makePrisms ''PM.PatchMap
393
394 makePrisms ''PatchMSet
395
396 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
397 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
398 where
399 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
400 f = Map.partition isRem >>> both %~ Map.keysSet
401
402 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
403 g (rems, adds) = Map.fromSet (const remPatch) rems
404 <> Map.fromSet (const addPatch) adds
405
406 instance Ord a => Action (PatchMSet a) (MSet a) where
407 act (PatchMSet p) (MSet m) = MSet $ act p m
408
409 instance Ord a => Applicable (PatchMSet a) (MSet a) where
410 applicable (PatchMSet p) (MSet m) = applicable p m
411
412 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
413 toJSON = toJSON . view _PatchMSetIso
414 toEncoding = toEncoding . view _PatchMSetIso
415
416 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
417 parseJSON = fmap (_PatchMSetIso #) . parseJSON
418
419 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
420 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
421
422 instance ToSchema a => ToSchema (PatchMSet a) where
423 -- TODO
424 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
425
426 type instance Patched (PatchMSet a) = MSet a
427
428 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
429 arbitrary = uncurry replace <$> arbitrary
430 -- If they happen to be equal then the patch is Keep.
431
432 instance ToSchema a => ToSchema (Replace a) where
433 declareNamedSchema (_ :: proxy (Replace a)) = do
434 -- TODO Keep constructor is not supported here.
435 aSchema <- declareSchemaRef (Proxy :: Proxy a)
436 return $ NamedSchema (Just "Replace") $ mempty
437 & type_ .~ SwaggerObject
438 & properties .~
439 InsOrdHashMap.fromList
440 [ ("old", aSchema)
441 , ("new", aSchema)
442 ]
443 & required .~ [ "old", "new" ]
444
445 data NgramsPatch =
446 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
447 , _patch_list :: Replace ListType -- TODO Map UserId ListType
448 }
449 deriving (Eq, Show, Generic)
450
451 deriveJSON (unPrefix "_") ''NgramsPatch
452 makeLenses ''NgramsPatch
453
454 instance ToSchema NgramsPatch
455
456 instance Arbitrary NgramsPatch where
457 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
458
459 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
460
461 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
462 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
463
464 instance Semigroup NgramsPatch where
465 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
466
467 instance Monoid NgramsPatch where
468 mempty = _NgramsPatch # mempty
469
470 instance Validity NgramsPatch where
471 validate p = p ^. _NgramsPatch . to validate
472
473 instance Transformable NgramsPatch where
474 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
475
476 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
477
478 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
479 where
480 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
481
482 type ConflictResolutionNgramsPatch =
483 ( ConflictResolutionPatchMSet NgramsTerm
484 , ConflictResolutionReplace ListType
485 )
486 type instance ConflictResolution NgramsPatch =
487 ConflictResolutionNgramsPatch
488
489 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
490 -- ~ Patched NgramsPatchIso
491 type instance Patched NgramsPatch = PatchedNgramsPatch
492
493 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
494 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
495 applicable p (Just nre) =
496 applicable (p ^. patch_children) (nre ^. nre_children) <>
497 applicable (p ^. patch_list) (nre ^. nre_list)
498
499 instance Action NgramsPatch NgramsRepoElement where
500 act p = (nre_children %~ act (p ^. patch_children))
501 . (nre_list %~ act (p ^. patch_list))
502
503 instance Action NgramsPatch (Maybe NgramsRepoElement) where
504 act = fmap . act
505
506 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
507 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
508
509 instance FromField NgramsTablePatch
510 where
511 fromField = fromField'
512
513 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
514 where
515 fromField = fromField'
516
517 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
518 --
519 type instance ConflictResolution NgramsTablePatch =
520 NgramsTerm -> ConflictResolutionNgramsPatch
521
522 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
523 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
524 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
525
526 makePrisms ''NgramsTablePatch
527 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
528 instance ToSchema NgramsTablePatch
529
530 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
531 applicable p = applicable (p ^. _NgramsTablePatch)
532
533 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
534 act p =
535 fmap (execState (reParentNgramsTablePatch p)) .
536 act (p ^. _NgramsTablePatch)
537
538 instance Arbitrary NgramsTablePatch where
539 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
540
541 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
542 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
543 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
544
545 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
546
547 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
548 reRootChildren root ngram = do
549 nre <- use $ at ngram
550 forOf_ (_Just . nre_children . folded) nre $ \child -> do
551 at child . _Just . nre_root ?= root
552 reRootChildren root child
553
554 reParent :: Maybe RootParent -> ReParent NgramsTerm
555 reParent rp child = do
556 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
557 . (nre_root .~ (_rp_root <$> rp))
558 )
559 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
560
561 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
562 reParentAddRem rp child p =
563 reParent (if isRem p then Nothing else Just rp) child
564
565 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
566 reParentNgramsPatch parent ngramsPatch = do
567 root_of_parent <- use (at parent . _Just . nre_root)
568 let
569 root = fromMaybe parent root_of_parent
570 rp = RootParent { _rp_root = root, _rp_parent = parent }
571 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
572 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
573
574 reParentNgramsTablePatch :: ReParent NgramsTablePatch
575 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
576 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
577
578 ------------------------------------------------------------------------
579 ------------------------------------------------------------------------
580 type Version = Int
581
582 data Versioned a = Versioned
583 { _v_version :: Version
584 , _v_data :: a
585 }
586 deriving (Generic, Show)
587 deriveJSON (unPrefix "_v_") ''Versioned
588 makeLenses ''Versioned
589 instance ToSchema a => ToSchema (Versioned a)
590 instance Arbitrary a => Arbitrary (Versioned a) where
591 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
592
593 {-
594 -- TODO sequencs of modifications (Patchs)
595 type NgramsIdPatch = Patch NgramsId NgramsPatch
596
597 ngramsPatch :: Int -> NgramsPatch
598 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
599
600 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
601 toEdit n p = Edit n p
602 ngramsIdPatch :: Patch NgramsId NgramsPatch
603 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
604 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
605 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
606 ]
607
608 -- applyPatchBack :: Patch -> IO Patch
609 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
610 -}
611 ------------------------------------------------------------------------
612 ------------------------------------------------------------------------
613 ------------------------------------------------------------------------
614
615 {-
616 -- TODO: Replace.old is ignored which means that if the current list
617 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
618 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
619 -- However this should not happen in non conflicting situations.
620 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
621 mkListsUpdate nt patches =
622 [ (ngramsTypeId nt, ng, listTypeId lt)
623 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
624 , lt <- patch ^.. patch_list . new
625 ]
626
627 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
628 -> NgramsType
629 -> NgramsTablePatch
630 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
631 mkChildrenGroups addOrRem nt patches =
632 [ (ngramsTypeId nt, parent, child)
633 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
634 , child <- patch ^.. patch_children . to addOrRem . folded
635 ]
636 -}
637
638 ngramsTypeFromTabType :: TabType -> NgramsType
639 ngramsTypeFromTabType tabType =
640 let lieu = "Garg.API.Ngrams: " :: Text in
641 case tabType of
642 Sources -> Ngrams.Sources
643 Authors -> Ngrams.Authors
644 Institutes -> Ngrams.Institutes
645 Terms -> Ngrams.NgramsTerms
646 _ -> panic $ lieu <> "No Ngrams for this tab"
647 -- ^ TODO: This `panic` would disapear with custom NgramsType.
648
649 ------------------------------------------------------------------------
650 data Repo s p = Repo
651 { _r_version :: Version
652 , _r_state :: s
653 , _r_history :: [p]
654 -- ^ first patch in the list is the most recent
655 }
656 deriving (Generic)
657
658 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
659 parseJSON = genericParseJSON $ unPrefix "_r_"
660
661 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
662 toJSON = genericToJSON $ unPrefix "_r_"
663 toEncoding = genericToEncoding $ unPrefix "_r_"
664
665 makeLenses ''Repo
666
667 initRepo :: Monoid s => Repo s p
668 initRepo = Repo 1 mempty []
669
670 type NgramsRepo = Repo NgramsState NgramsStatePatch
671 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
672 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
673
674 initMockRepo :: NgramsRepo
675 initMockRepo = Repo 1 s []
676 where
677 s = Map.singleton Ngrams.NgramsTerms
678 $ Map.singleton 47254
679 $ Map.fromList
680 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
681
682 data RepoEnv = RepoEnv
683 { _renv_var :: !(MVar NgramsRepo)
684 , _renv_saver :: !(IO ())
685 , _renv_lock :: !FileLock
686 }
687 deriving (Generic)
688
689 makeLenses ''RepoEnv
690
691 class HasRepoVar env where
692 repoVar :: Getter env (MVar NgramsRepo)
693
694 instance HasRepoVar (MVar NgramsRepo) where
695 repoVar = identity
696
697 class HasRepoSaver env where
698 repoSaver :: Getter env (IO ())
699
700 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
701 repoEnv :: Getter env RepoEnv
702
703 instance HasRepo RepoEnv where
704 repoEnv = identity
705
706 instance HasRepoVar RepoEnv where
707 repoVar = renv_var
708
709 instance HasRepoSaver RepoEnv where
710 repoSaver = renv_saver
711
712 type RepoCmdM env err m =
713 ( MonadReader env m
714 , MonadError err m
715 , MonadIO m
716 , HasRepo env
717 )
718 ------------------------------------------------------------------------
719
720 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
721 => m ()
722 saveRepo = liftIO =<< view repoSaver
723
724 listTypeConflictResolution :: ListType -> ListType -> ListType
725 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
726
727 ngramsStatePatchConflictResolution
728 :: NgramsType -> NodeId -> NgramsTerm
729 -> ConflictResolutionNgramsPatch
730 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
731 = (const ours, ours)
732 -- undefined {- TODO think this through -}, listTypeConflictResolution)
733
734 -- Current state:
735 -- Insertions are not considered as patches,
736 -- they do not extend history,
737 -- they do not bump version.
738 insertNewOnly :: a -> Maybe b -> a
739 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
740 -- TODO error handling
741
742 something :: Monoid a => Maybe a -> a
743 something Nothing = mempty
744 something (Just a) = a
745
746 {- unused
747 -- TODO refactor with putListNgrams
748 copyListNgrams :: RepoCmdM env err m
749 => NodeId -> NodeId -> NgramsType
750 -> m ()
751 copyListNgrams srcListId dstListId ngramsType = do
752 var <- view repoVar
753 liftIO $ modifyMVar_ var $
754 pure . (r_state . at ngramsType %~ (Just . f . something))
755 saveRepo
756 where
757 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
758 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
759
760 -- TODO refactor with putListNgrams
761 -- The list must be non-empty!
762 -- The added ngrams must be non-existent!
763 addListNgrams :: RepoCmdM env err m
764 => NodeId -> NgramsType
765 -> [NgramsElement] -> m ()
766 addListNgrams listId ngramsType nes = do
767 var <- view repoVar
768 liftIO $ modifyMVar_ var $
769 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
770 saveRepo
771 where
772 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
773 -}
774
775 putListNgrams :: RepoCmdM env err m
776 => NodeId -> NgramsType
777 -> [NgramsElement] -> m ()
778 putListNgrams _ _ [] = pure ()
779 putListNgrams listId ngramsType nes = do
780 -- printDebug "putListNgrams" (length nes)
781 var <- view repoVar
782 liftIO $ modifyMVar_ var $
783 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (m <>) . something)) . something))
784 saveRepo
785 where
786 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
787
788 -- Apply the given patch to the DB and returns the patch to be applied on the
789 -- client.
790 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
791 => CorpusId -> TabType -> ListId
792 -> Versioned NgramsTablePatch
793 -> m (Versioned NgramsTablePatch)
794 tableNgramsPut _corpusId tabType listId (Versioned p_version p_table)
795 | p_table == mempty = do
796 let ngramsType = ngramsTypeFromTabType tabType
797
798 var <- view repoVar
799 r <- liftIO $ readMVar var
800
801 let
802 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
803 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
804
805 pure (Versioned (r ^. r_version) q_table)
806
807 | otherwise = do
808 let ngramsType = ngramsTypeFromTabType tabType
809 (p0, p0_validity) = PM.singleton listId p_table
810 (p, p_validity) = PM.singleton ngramsType p0
811
812 assertValid p0_validity
813 assertValid p_validity
814
815 var <- view repoVar
816 vq' <- liftIO $ modifyMVar var $ \r -> do
817 let
818 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
819 (p', q') = transformWith ngramsStatePatchConflictResolution p q
820 r' = r & r_version +~ 1
821 & r_state %~ act p'
822 & r_history %~ (p' :)
823 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
824 {-
825 -- Ideally we would like to check these properties. However:
826 -- * They should be checked only to debug the code. The client data
827 -- should be able to trigger these.
828 -- * What kind of error should they throw (we are in IO here)?
829 -- * Should we keep modifyMVar?
830 -- * Should we throw the validation in an Exception, catch it around
831 -- modifyMVar and throw it back as an Error?
832 assertValid $ transformable p q
833 assertValid $ applicable p' (r ^. r_state)
834 -}
835 pure (r', Versioned (r' ^. r_version) q'_table)
836
837 saveRepo
838 pure vq'
839
840 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
841 mergeNgramsElement _neOld neNew = neNew
842 {-
843 { _ne_list :: ListType
844 If we merge the parents/children we can potentially create cycles!
845 , _ne_parent :: Maybe NgramsTerm
846 , _ne_children :: MSet NgramsTerm
847 }
848 -}
849
850 getNgramsTableMap :: RepoCmdM env err m
851 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
852 getNgramsTableMap nodeId ngramsType = do
853 v <- view repoVar
854 repo <- liftIO $ readMVar v
855 pure $ Versioned (repo ^. r_version)
856 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
857
858 type MinSize = Int
859 type MaxSize = Int
860
861 -- | TODO Errors management
862 -- TODO: polymorphic for Annuaire or Corpus or ...
863 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
864 -- TODO: should take only one ListId
865
866
867
868
869 getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
870 => NodeId -> TabType
871 -> ListId -> Limit -> Maybe Offset
872 -> Maybe ListType
873 -> Maybe MinSize -> Maybe MaxSize
874 -> (NgramsTerm -> Bool)
875 -> m (Versioned NgramsTable)
876 getTableNgrams nId tabType listId limit_ offset
877 listType minSize maxSize searchQuery = do
878
879 let
880 ngramsType = ngramsTypeFromTabType tabType
881 offset' = maybe 0 identity offset
882 listType' = maybe (const True) (==) listType
883 minSize' = maybe (const True) (<=) minSize
884 maxSize' = maybe (const True) (>=) maxSize
885
886 selected_node n = minSize' s
887 && maxSize' s
888 && searchQuery (n ^. ne_ngrams)
889 && listType' (n ^. ne_list)
890 where
891 s = n ^. ne_size
892
893 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
894
895 finalize tableMap = NgramsTable $ roots <> inners
896 where
897 rootOf ne = maybe ne (\r -> ngramsElementFromRepo (r, fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r)))
898 (ne ^. ne_root)
899 list = ngramsElementFromRepo <$> Map.toList tableMap
900 selected_nodes = list & take limit_ . drop offset' . filter selected_node
901 roots = rootOf <$> selected_nodes
902 rootsSet = Set.fromList (_ne_ngrams <$> roots)
903 inners = list & filter (selected_inner rootsSet)
904
905 -- lists <- catMaybes <$> listsWith userMaster
906 -- trace (show lists) $
907 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
908
909 table <- getNgramsTableMap listId ngramsType & mapped . v_data %~ finalize
910
911 lIds <- selectNodesWithUsername NodeList userMaster
912 occurrences <- getOccByNgramsOnlySafe nId (lIds <> [listId]) ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
913
914 let
915 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
916
917 pure $ table & v_data . _NgramsTable . each %~ setOcc
918
919
920 -- APIs
921
922 -- TODO: find a better place for the code above, All APIs stay here
923 type QueryParamR = QueryParam' '[Required, Strict]
924
925 type TableNgramsApiGet = Summary " Table Ngrams API Get"
926 :> QueryParamR "ngramsType" TabType
927 :> QueryParamR "list" ListId
928 :> QueryParamR "limit" Limit
929 :> QueryParam "offset" Offset
930 :> QueryParam "listType" ListType
931 :> QueryParam "minTermSize" Int
932 :> QueryParam "maxTermSize" Int
933 :> QueryParam "search" Text
934 :> Get '[JSON] (Versioned NgramsTable)
935
936 type TableNgramsApiPut = Summary " Table Ngrams API Change"
937 :> QueryParamR "ngramsType" TabType
938 :> QueryParamR "list" ListId
939 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
940 :> Put '[JSON] (Versioned NgramsTablePatch)
941
942
943 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
944 => NodeId -> TabType
945 -> ListId -> Limit -> Maybe Offset
946 -> Maybe ListType
947 -> Maybe MinSize -> Maybe MaxSize
948 -> Maybe Text -- full text search
949 -> m (Versioned NgramsTable)
950 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize mt =
951 getTableNgrams nId tabType listId limit_ offset listType minSize maxSize searchQuery
952 where
953 searchQuery = maybe (const True) isInfixOf mt
954
955 -- | Text search is deactivated for now for ngrams by doc only
956 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
957 => DocId -> TabType
958 -> ListId -> Limit -> Maybe Offset
959 -> Maybe ListType
960 -> Maybe MinSize -> Maybe MaxSize
961 -> Maybe Text -- full text search
962 -> m (Versioned NgramsTable)
963 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize _mt = do
964 ns <- selectNodesWithUsername NodeList userMaster
965 let ngramsType = ngramsTypeFromTabType tabType
966 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
967 let searchQuery = flip S.member (S.fromList ngs)
968 getTableNgrams dId tabType listId limit_ offset listType minSize maxSize searchQuery
969
970
971
972
973 --{-
974 -- TODO Doc Table Ngrams API
975 type ApiNgramsTableDoc = TableNgramsApiGet
976 -- :<|> TableNgramsApiPut
977 -- :<|> TableNgramsApiPost
978
979 apiNgramsTableDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
980 => DocId -> TabType
981 -> ListId -> Limit -> Maybe Offset
982 -> Maybe ListType
983 -> Maybe MinSize -> Maybe MaxSize
984 -> Maybe Text -- full text search
985 -> m (Versioned NgramsTable)
986 {- TODO
987 --apiDocNgramsTable :: ApiDocNgramsTable
988 --apiDocNgramsTable :: ApiDocNgramsTable
989 --apiDocNgramsTable = getTableNgramsDoc
990 :<|> tableNgramsPut
991 :<|> tableNgramsPost
992 -- > add new ngrams to the repo (TODO NP)
993 -- > add new ngrams in database (TODO AD)
994 -- > index all the corpus accordingly (TODO AD)
995 --}
996 apiNgramsTableDoc = getTableNgramsDoc
997