import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.List (concat)
-import Data.Map (Map, lookup)
+import Data.Map.Strict (Map, lookup)
import Data.Maybe (catMaybes)
import Data.Monoid
import Data.Swagger
import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
-import qualified Data.Map as Map
+import qualified Data.Map.Strict as Map
import qualified Data.Conduit.List as CL
import qualified Data.Conduit as C
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..))
-import Gargantext.Core.Ext.IMT (toSchoolName)
+-- import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
-import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
+import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType, splitOn)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId
-flowDataText u (DataOld ids) tt cid mfslw _ = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
+flowDataText u (DataOld ids) tt cid mfslw _ = do
+ (_userId, userCorpusId, listId) <- createNodes u (Right [cid]) corpusType
+ _ <- Doc.add userCorpusId ids
+ flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw
where
corpusType = (Nothing :: Maybe HyperdataCorpus)
flowDataText u (DataNew (mLen, txtC)) tt cid mfslw logStatus =
-> (JobLog -> m ())
-> m CorpusId
flow c u cn la mfslw (mLength, docsC) logStatus = do
+ (_userId, userCorpusId, listId) <- createNodes u cn c
-- TODO if public insertMasterDocs else insertUserDocs
- ids <- runConduit $ zipSources (yieldMany [1..]) docsC
- .| CList.chunksOf 100
- .| mapMC insertDocs'
- .| CList.concat
- .| sinkList
+ _ <- runConduit $ zipSources (yieldMany [1..]) docsC
+ .| CList.chunksOf 100
+ .| mapMC insertDocs'
+ .| mapM_C (\ids' -> do
+ _ <- Doc.add userCorpusId ids'
+ pure ())
+ .| sinkList
+
+ _ <- flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
+
-- ids <- traverse (\(idx, doc) -> do
-- id <- insertMasterDocs c la doc
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
-- }
-- pure id
-- ) (zip [1..] docs)
- flowCorpusUser (la ^. tt_lang) u cn c ids mfslw
+ --printDebug "[flow] calling flowCorpusUser" (0 :: Int)
+ pure userCorpusId
+ --flowCorpusUser (la ^. tt_lang) u cn c ids mfslw
where
insertDocs' :: [(Integer, a)] -> m [NodeId]
------------------------------------------------------------------------
-flowCorpusUser :: ( FlowCmdM env err m
- , MkCorpus c
- )
- => Lang
- -> User
- -> Either CorpusName [CorpusId]
- -> Maybe c
- -> [ContextId]
- -> Maybe FlowSocialListWith
- -> m CorpusId
-flowCorpusUser l user corpusName ctype ids mfslw = do
+createNodes :: ( FlowCmdM env err m
+ , MkCorpus c
+ )
+ => User
+ -> Either CorpusName [CorpusId]
+ -> Maybe c
+ -> m (UserId, CorpusId, ListId)
+createNodes user corpusName ctype = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first
-- NodeList is second
listId <- getOrMkList userCorpusId userId
- -- _cooc <- insertDefaultNode NodeListCooc listId userId
- -- TODO: check if present already, ignore
- _ <- Doc.add userCorpusId ids
- -- printDebug "Node Text Ids:" tId
+ -- User Graph Flow
+ _ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
+ _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
+
+ pure (userId, userCorpusId, listId)
+
+flowCorpusUser :: ( FlowCmdM env err m
+ , MkCorpus c
+ )
+ => Lang
+ -> User
+ -> CorpusId
+ -> ListId
+ -> Maybe c
+ -> Maybe FlowSocialListWith
+ -> m CorpusId
+flowCorpusUser l user userCorpusId listId ctype mfslw = do
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
pure ()
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
- -- User Graph Flow
- _ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
- _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
--_ <- mkPhylo userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
$ _hd_source doc
institutes = map text2ngrams
- $ maybe ["Nothing"] (map toSchoolName . (T.splitOn ", "))
+ $ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ _hd_institutes doc
authors = map text2ngrams
- $ maybe ["Nothing"] (T.splitOn ", ")
+ $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ _hd_authors doc
+
termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang' ^. tt_lang) CoreNLP NP t, cnt))
<$> concat
<$> liftBase (extractTerms lang' $ hasText doc)