{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-}
-{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE InstanceSigs #-}
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.List (concat)
+import qualified Data.Map as Map
import Data.Map (Map, lookup)
import Data.Maybe (Maybe(..), catMaybes)
import Data.Monoid
import Data.Text (splitOn, intercalate)
import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second)
-import Debug.Trace (trace)
+import GHC.Generics (Generic)
+import System.FilePath (FilePath)
+
import Gargantext.Core (Lang(..))
import Gargantext.Core.Flow.Types
import Gargantext.Core.Types (Terms(..))
import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams)
-import Gargantext.Database.Action.Query.Node
-import Gargantext.Database.Action.Query.Node.Contact -- (HyperdataContact(..), ContactWho(..))
-import Gargantext.Database.Action.Query.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
-import Gargantext.Database.Action.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
+import Gargantext.Database.Query.Table.Node
+import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
+import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Action.Search (searchInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
-import Gargantext.Database.Admin.Types.Errors (HasNodeError(..))
+import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
+import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
-import Gargantext.Database.Admin.Utils (Cmd)
-import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
-import Gargantext.Database.Schema.NodeNgrams (listInsertDb , getCgramsId)
-import Gargantext.Database.Schema.NodeNodeNgrams2 -- (NodeNodeNgrams2, insertNodeNodeNgrams2)
+import Gargantext.Database.Prelude
+import Gargantext.Database.Query.Table.Ngrams
+import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
+import Gargantext.Database.Query.Table.NodeNodeNgrams2
import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
+import Gargantext.Text
import Gargantext.Prelude
import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Text.List (buildNgramsLists,StopSize(..))
-import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText)
-import Gargantext.Text.Terms.Eleve (buildTries, toToken)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
-import GHC.Generics (Generic)
-import Prelude (String)
-import System.FilePath (FilePath)
-import qualified Data.List as List
-import qualified Data.Map as Map
-import qualified Data.Text as Text
-import qualified Gargantext.Database.Action.Query.Node.Document.Add as Doc (add)
+import Gargantext.Text.Terms
+import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
import qualified Gargantext.Text.Corpus.API as API
------------------------------------------------------------------------
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
listId <- getOrMkList userCorpusId userId
- _cooc <- mkNode NodeListCooc listId userId
+ _cooc <- insertDefaultNode NodeListCooc listId userId
-- TODO: check if present already, ignore
_ <- Doc.add userCorpusId ids
- _tId <- mkNode NodeTexts userCorpusId userId
+ _tId <- insertDefaultNode NodeTexts userCorpusId userId
-- printDebug "Node Text Id" tId
-- User List Flow
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
-- User Graph Flow
- _ <- mkDashboard userCorpusId userId
- _ <- mkGraph userCorpusId userId
+ _ <- insertDefaultNode NodeDashboard userCorpusId userId
+ _ <- insertDefaultNode NodeGraph userCorpusId userId
--_ <- mkPhylo userCorpusId userId
-- Annuaire Flow
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
- <*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms)
+ <*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral w :: Double)
- | (terms, mapNgramsTypes) <- Map.toList maps
+ | (terms'', mapNgramsTypes) <- Map.toList maps
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
_ <- Doc.add masterCorpusId ids'
- _cooc <- mkNode NodeListCooc lId masterUserId
+ _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed
_ <- insertDocNgrams lId indexedNgrams
pure ids'
-withLang :: HasText a
- => TermType Lang
- -> [DocumentWithId a]
- -> TermType Lang
-withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
- where
- m' = case m of
- Nothing -> trace ("buildTries here" :: String)
- $ Just
- $ buildTries n ( fmap toToken $ uniText
- $ Text.intercalate " . "
- $ List.concat
- $ map hasText ns
- )
- just_m -> just_m
-withLang l _ = l
+------------------------------------------------------------------------
+
------------------------------------------------------------------------
where
hasText = undefined
+------------------------------------------------------------------------
+------------------------------------------------------------------------
+
+documentIdWithNgrams :: HasNodeError err
+ => (a
+ -> Cmd err (Map Ngrams (Map NgramsType Int)))
+ -> [DocumentWithId a]
+ -> Cmd err [DocumentIdWithNgrams a]
+documentIdWithNgrams f = traverse toDocumentIdWithNgrams
+ where
+ toDocumentIdWithNgrams d = do
+ e <- f $ documentData d
+ pure $ DocumentIdWithNgrams d e
+
+
+------------------------------------------------------------------------
+
+
instance ExtractNgramsT HyperdataContact
where
extractNgramsT l hc = filterNgramsT 255 <$> extract l hc
instance HasText HyperdataDocument
where
- hasText h = catMaybes [ _hyperdataDocument_title h
- , _hyperdataDocument_abstract h
+ hasText h = catMaybes [ _hd_title h
+ , _hd_abstract h
]
instance ExtractNgramsT HyperdataDocument
extractNgramsT' lang' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
- $ _hyperdataDocument_source doc
+ $ _hd_source doc
institutes = map text2ngrams
$ maybe ["Nothing"] (map toSchoolName . (splitOn ", "))
- $ _hyperdataDocument_institutes doc
+ $ _hd_institutes doc
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn ", ")
- $ _hyperdataDocument_authors doc
+ $ _hd_authors doc
terms' <- map text2ngrams
<$> map (intercalate " " . _terms_label)
<> [(a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
-filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
- -> Map Ngrams (Map NgramsType Int)
-filterNgramsT s ms = Map.fromList $ map (\a -> filter' s a) $ Map.toList ms
- where
- filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
- True -> (ng,y)
- False -> (Ngrams (Text.take s' t) n , y)
-
-
-documentIdWithNgrams :: HasNodeError err
- => (a
- -> Cmd err (Map Ngrams (Map NgramsType Int)))
- -> [DocumentWithId a]
- -> Cmd err [DocumentIdWithNgrams a]
-documentIdWithNgrams f = traverse toDocumentIdWithNgrams
- where
- toDocumentIdWithNgrams d = do
- e <- f $ documentData d
- pure $ DocumentIdWithNgrams d e