, flowCorpus
, flowAnnuaire
, insertMasterDocs
+ , saveDocNgramsWith
, getOrMkRoot
, getOrMk_RootWithCorpus
, allDataOrigins
, do_api
+ , indexAllDocumentsWithPosTag
)
where
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName)
-import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
+import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Table.NodeNodeNgrams2
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
-import Gargantext.Database.Schema.Node (NodePoly(..))
+import Gargantext.Database.Schema.Node (NodePoly(..), node_id)
import Gargantext.Database.Types
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
+------------------------------------------------------------------------
+-- Impots for upgrade function
+import Gargantext.Database.Query.Tree.Root (getRootId)
+import Gargantext.Database.Query.Tree (findNodesId)
+import qualified Data.List as List
------------------------------------------------------------------------
-- TODO use internal with API name (could be old data)
data DataOrigin = InternalOrigin { _do_api :: API.ExternalAPIs }
-> FilePath
-> m AnnuaireId
flowAnnuaire u n l filePath = do
- docs <- liftBase $ (( splitEvery 500 <$> deserialiseImtUsersFromFile filePath) :: IO [[HyperdataContact]])
+ docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l docs
------------------------------------------------------------------------
-- printDebug "Node Text Ids:" tId
-- User List Flow
- (masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
+ (masterUserId, _masterRootId, masterCorpusId)
+ <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
--let gp = (GroupParams l 2 3 (StopSize 3))
let gp = GroupWithPosTag l CoreNLP HashMap.empty
(extractNgramsT $ withLang lang documentsWithId)
documentsWithId
+ lId <- getOrMkList masterCorpusId masterUserId
+ _ <- saveDocNgramsWith lId mapNgramsDocs'
+
+ -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
+ pure ids'
+
+saveDocNgramsWith :: ( FlowCmdM env err m)
+ => ListId
+ -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
+ -> m ()
+saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
-- new
- lId <- getOrMkList masterCorpusId masterUserId
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
+
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
-
- -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed
_ <- insertDocNgrams lId indexedNgrams
- pure ids'
+
+ pure ()
+
------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
hasText (Node _ _ _ _ _ _ _ h) = hasText h
+
+-- | TODO putelsewhere
+-- | Upgrade function
+-- Suppose all documents are English (this is the case actually)
+indexAllDocumentsWithPosTag :: FlowCmdM env err m => m ()
+indexAllDocumentsWithPosTag = do
+ rootId <- getRootId (UserName userMaster)
+ corpusIds <- findNodesId rootId [NodeCorpus]
+ docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds
+
+ _ <- mapM extractInsert (splitEvery 1000 docs)
+
+ pure ()
+
+extractInsert :: FlowCmdM env err m => [Node HyperdataDocument] -> m ()
+extractInsert docs = do
+ let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
+
+ mapNgramsDocs' <- mapNodeIdNgrams
+ <$> documentIdWithNgrams
+ (extractNgramsT $ withLang (Multi EN) documentsWithId)
+ documentsWithId
+
+ _ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
+
+ pure ()
+
+