{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-}
-{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE InstanceSigs #-}
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
( FlowCmdM
+ , getDataText
+ , flowDataText
+
, flowCorpusFile
, flowCorpus
- , flowCorpusSearchInDatabase
+ , flowAnnuaire
+
, getOrMkRoot
, getOrMk_RootWithCorpus
- , flowAnnuaire
+ , TermType(..)
+ , DataOrigin(..)
+ , allDataOrigins
+
+ , do_api
)
where
-import Control.Lens ((^.), view, _Just)
+import Control.Lens ((^.), view, _Just, makeLenses)
+import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.List (concat)
+import qualified Data.Map as Map
import Data.Map (Map, lookup)
import Data.Maybe (Maybe(..), catMaybes)
import Data.Monoid
-import Data.Text (Text, splitOn, intercalate)
+import Data.Swagger
+import Data.Text (splitOn, intercalate)
import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second)
-import Debug.Trace (trace)
+import GHC.Generics (Generic)
+import System.FilePath (FilePath)
+
import Gargantext.Core (Lang(..))
import Gargantext.Core.Flow.Types
-import Gargantext.Core.Types (NodePoly(..), Terms(..))
+import Gargantext.Core.Types (Terms(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
-import Gargantext.Database.Config (userMaster, corpusMasterName)
-import Gargantext.Database.Flow.List
-import Gargantext.Database.Flow.Types
-import Gargantext.Database.Flow.Utils (insertDocNgrams)
-import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
-import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
-import Gargantext.Database.Root (getRoot)
-import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
-import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
-import Gargantext.Database.Schema.NodeNgrams (listInsertDb , getCgramsId)
-import Gargantext.Database.Schema.NodeNodeNgrams2 -- (NodeNodeNgrams2, insertNodeNodeNgrams2)
-import Gargantext.Database.Schema.User (getUserId)
-import Gargantext.Database.TextSearch (searchInDatabase)
-import Gargantext.Database.Types.Errors (HasNodeError(..), NodeError(..), nodeError)
-import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
-import Gargantext.Database.Utils (Cmd)
+import Gargantext.Database.Action.Flow.List
+import Gargantext.Database.Action.Flow.Types
+import Gargantext.Database.Action.Flow.Utils (insertDocNgrams)
+import Gargantext.Database.Query.Table.Node
+import Gargantext.Database.Query.Table.Node.Contact -- (HyperdataContact(..), ContactWho(..))
+import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
+import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
+import Gargantext.Database.Action.Search (searchInDatabase)
+import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
+import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
+import Gargantext.Database.Admin.Types.Hyperdata
+import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
+import Gargantext.Database.Prelude
+import Gargantext.Database.Query.Table.Ngrams
+import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
+import Gargantext.Database.Query.Table.NodeNodeNgrams2
import Gargantext.Ext.IMT (toSchoolName)
+import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
+import Gargantext.Text
import Gargantext.Prelude
-import Gargantext.Prelude.Utils hiding (sha)
import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Text.List (buildNgramsLists,StopSize(..))
-import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText)
-import Gargantext.Text.Terms.Eleve (buildTries, toToken)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
-import Prelude (String)
-import System.FilePath (FilePath)
-import qualified Data.List as List
-import qualified Data.Map as Map
-import qualified Data.Text as Text
-import qualified Gargantext.Database.Node.Document.Add as Doc (add)
-import qualified Gargantext.Text.Corpus.API.Isidore as Isidore
-import qualified Gargantext.Text.Corpus.Parsers.GrandDebat as GD
+import Gargantext.Text.Terms
+import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
+import qualified Gargantext.Text.Corpus.API as API
------------------------------------------------------------------------
+-- TODO use internal with API name (could be old data)
+data DataOrigin = InternalOrigin { _do_api :: API.ExternalAPIs }
+ | ExternalOrigin { _do_api :: API.ExternalAPIs }
+ -- TODO Web
+ deriving (Generic, Eq)
+
+makeLenses ''DataOrigin
+deriveJSON (unPrefix "_do_") ''DataOrigin
+instance ToSchema DataOrigin where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
+
+allDataOrigins :: [DataOrigin]
+allDataOrigins = map InternalOrigin API.externalAPIs
+ <> map ExternalOrigin API.externalAPIs
+
+---------------
+
+data DataText = DataOld ![NodeId]
+ | DataNew ![[HyperdataDocument]]
+
+
+-- TODO use the split parameter in config file
+getDataText :: FlowCmdM env err m
+ => DataOrigin
+ -> TermType Lang
+ -> API.Query
+ -> Maybe API.Limit
+ -> m DataText
+getDataText (ExternalOrigin api) la q li = liftBase $ DataNew
+ <$> splitEvery 500
+ <$> API.get api (_tt_lang la) q li
+getDataText (InternalOrigin _) _la q _li = do
+ (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
+ (UserName userMaster)
+ (Left "")
+ (Nothing :: Maybe HyperdataCorpus)
+ ids <- map fst <$> searchInDatabase cId (stemIt q)
+ pure $ DataOld ids
-data ApiQuery = ApiIsidoreQuery Text | ApiIsidoreAuth Text
--- | APIs
--- TODO instances
-getDataApi :: Lang
- -> Maybe Limit
- -> ApiQuery
- -> IO [HyperdataDocument]
-getDataApi lang limit (ApiIsidoreQuery q) = Isidore.get lang limit (Just q) Nothing
-getDataApi lang limit (ApiIsidoreAuth q) = Isidore.get lang limit Nothing (Just q)
-
-
--- UNUSED
-_flowCorpusApi :: ( FlowCmdM env err m)
- => User -> Either CorpusName [CorpusId]
- -> TermType Lang
- -> Maybe Limit
- -> ApiQuery
- -> m CorpusId
-_flowCorpusApi u n tt l q = do
- docs <- liftBase $ splitEvery 500 <$> getDataApi (_tt_lang tt) l q
- flowCorpus u n tt docs
+-------------------------------------------------------------------------------
+flowDataText :: FlowCmdM env err m
+ => User
+ -> DataText
+ -> TermType Lang
+ -> CorpusId
+ -> m CorpusId
+flowDataText u (DataOld ids) tt cid = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids
+ where
+ corpusType = (Nothing :: Maybe HyperdataCorpus)
+flowDataText u (DataNew txt) tt cid = flowCorpus u (Right [cid]) tt txt
------------------------------------------------------------------------
-
+-- TODO use proxy
flowAnnuaire :: FlowCmdM env err m
=> User
-> Either CorpusName [CorpusId]
docs <- liftBase $ (( splitEvery 500 <$> deserialiseImtUsersFromFile filePath) :: IO [[HyperdataContact]])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l docs
--- UNUSED
-_flowCorpusDebat :: FlowCmdM env err m
- => User -> Either CorpusName [CorpusId]
- -> Limit -> FilePath
- -> m CorpusId
-_flowCorpusDebat u n l fp = do
- docs <- liftBase ( splitEvery 500
- <$> take l
- <$> readFile' fp
- :: IO [[GD.GrandDebatReference ]]
- )
- flowCorpus u n (Multi FR) (map (map toHyperdataDocument) docs)
-
+------------------------------------------------------------------------
flowCorpusFile :: FlowCmdM env err m
- => User -> Either CorpusName [CorpusId]
+ => User
+ -> Either CorpusName [CorpusId]
-> Limit -- Limit the number of docs (for dev purpose)
-> TermType Lang -> FileFormat -> FilePath
-> m CorpusId
)
flowCorpus u n la (map (map toHyperdataDocument) docs)
--- TODO query with complex query
-flowCorpusSearchInDatabase :: FlowCmdM env err m
- => User
- -> Lang
- -> Text
- -> m CorpusId
-flowCorpusSearchInDatabase u la q = do
- (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
- (UserName userMaster)
- (Left "")
- (Nothing :: Maybe HyperdataCorpus)
- ids <- map fst <$> searchInDatabase cId (stemIt q)
- flowCorpusUser la u (Left q) (Nothing :: Maybe HyperdataCorpus) ids
-
-
--- UNUSED
-_flowCorpusSearchInDatabaseApi :: FlowCmdM env err m
- => User
- -> Lang
- -> Text
- -> m CorpusId
-_flowCorpusSearchInDatabaseApi u la q = do
- (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
- (UserName userMaster)
- (Left "")
- (Nothing :: Maybe HyperdataCorpus)
- ids <- map fst <$> searchInDatabase cId (stemIt q)
- flowCorpusUser la u (Left q) (Nothing :: Maybe HyperdataCorpus) ids
-
------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-{- UNUSED
-data UserInfo = Username Text
- | UserId NodeId
-data CorpusInfo = CorpusName Lang Text
- | CorpusId Lang NodeId
--}
+-- (For now, Either is enough)
+flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
+ => User
+ -> Either CorpusName [CorpusId]
+ -> TermType Lang
+ -> [[a]]
+ -> m CorpusId
+flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
+
flow :: (FlowCmdM env err m, FlowCorpus a, MkCorpus c)
=> Maybe c
-> [[a]]
-> m CorpusId
flow c u cn la docs = do
- ids <- traverse (insertMasterDocs c la ) docs
+ ids <- traverse (insertMasterDocs c la) docs
flowCorpusUser (la ^. tt_lang) u cn c (concat ids)
-flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
- => User
- -> Either CorpusName [CorpusId]
- -> TermType Lang
- -> [[a]]
- -> m CorpusId
-flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
-
------------------------------------------------------------------------
flowCorpusUser :: (FlowCmdM env err m, MkCorpus c)
=> Lang
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
- <*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms)
+ <*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral w :: Double)
- | (terms, mapNgramsTypes) <- Map.toList maps
+ | (terms'', mapNgramsTypes) <- Map.toList maps
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
pure ids'
-withLang :: HasText a => TermType Lang
- -> [DocumentWithId a]
- -> TermType Lang
-withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
- where
- m' = case m of
- Nothing -> trace ("buildTries here" :: String)
- $ Just
- $ buildTries n ( fmap toToken $ uniText
- $ Text.intercalate " . "
- $ List.concat
- $ map hasText ns
- )
- just_m -> just_m
-withLang l _ = l
-
-
-
-type CorpusName = Text
-
-getOrMkRoot :: (HasNodeError err)
- => User
- -> Cmd err (UserId, RootId)
-getOrMkRoot user = do
- userId <- getUserId user
-
- rootId' <- map _node_id <$> getRoot user
-
- rootId'' <- case rootId' of
- [] -> mkRoot user
- n -> case length n >= 2 of
- True -> nodeError ManyNodeUsers
- False -> pure rootId'
-
- rootId <- maybe (nodeError NoRootFound) pure (head rootId'')
- pure (userId, rootId)
-
-
-getOrMk_RootWithCorpus :: (HasNodeError err, MkCorpus a)
- => User
- -> Either CorpusName [CorpusId]
- -> Maybe a
- -> Cmd err (UserId, RootId, CorpusId)
-getOrMk_RootWithCorpus user cName c = do
- (userId, rootId) <- getOrMkRoot user
- corpusId'' <- if user == UserName userMaster
- then do
- ns <- getCorporaWithParentId rootId
- pure $ map _node_id ns
- else
- pure $ fromRight [] cName
-
- corpusId' <- if corpusId'' /= []
- then pure corpusId''
- else do
- c' <- mk (Just $ fromLeft "Default" cName) c rootId userId
- _tId <- case head c' of
- Nothing -> pure [0]
- Just c'' -> mkNode NodeTexts c'' userId
- pure c'
-
- corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId')
- pure (userId, rootId, corpusId)
+------------------------------------------------------------------------
+
------------------------------------------------------------------------
where
hasText = undefined
+------------------------------------------------------------------------
+------------------------------------------------------------------------
+
+documentIdWithNgrams :: HasNodeError err
+ => (a
+ -> Cmd err (Map Ngrams (Map NgramsType Int)))
+ -> [DocumentWithId a]
+ -> Cmd err [DocumentIdWithNgrams a]
+documentIdWithNgrams f = traverse toDocumentIdWithNgrams
+ where
+ toDocumentIdWithNgrams d = do
+ e <- f $ documentData d
+ pure $ DocumentIdWithNgrams d e
+
+
+------------------------------------------------------------------------
+
+
instance ExtractNgramsT HyperdataContact
where
extractNgramsT l hc = filterNgramsT 255 <$> extract l hc
<> [(a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
-filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
- -> Map Ngrams (Map NgramsType Int)
-filterNgramsT s ms = Map.fromList $ map (\a -> filter' s a) $ Map.toList ms
- where
- filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
- True -> (ng,y)
- False -> (Ngrams (Text.take s' t) n , y)
-
-
-documentIdWithNgrams :: HasNodeError err
- => (a
- -> Cmd err (Map Ngrams (Map NgramsType Int)))
- -> [DocumentWithId a]
- -> Cmd err [DocumentIdWithNgrams a]
-documentIdWithNgrams f = traverse toDocumentIdWithNgrams
- where
- toDocumentIdWithNgrams d = do
- e <- f $ documentData d
- pure $ DocumentIdWithNgrams d e