import Data.Traversable
import GHC.Base (String)
import GHC.Generics (Generic)
-import qualified Data.List as List
-import qualified Data.Map as Map
-import qualified Data.Set as Set
-import qualified Data.Text as Text
-
+import qualified Data.List as List
+import qualified Data.Set as Set
+import qualified Data.Text as Text
+import qualified Data.HashMap.Strict as HashMap
import Gargantext.Core
-import Gargantext.Core.Flow.Types
import Gargantext.Core.Text (sentences, HasText(..))
import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
import Gargantext.Core.Text.Terms.Mono (monoTerms)
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
-import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgramsPostag)
+import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgramsPostag, np_form, np_lem)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams, NgramsId)
import Gargantext.Prelude
-import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
data TermType lang
= Mono { _tt_lang :: !lang }
where
m' = case m of
Nothing -> -- trace ("buildTries here" :: String)
- Just $ buildTries n ( fmap toToken
+ Just $ buildTries n $ fmap toToken
$ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText ns
- )
just_m -> just_m
withLang l _ = l
------------------------------------------------------------------------
data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
| EnrichedNgrams { unEnrichedNgrams :: NgramsPostag }
- deriving (Eq, Ord, Generic)
+ deriving (Eq, Ord, Generic, Show)
instance Hashable ExtractedNgrams
-> h
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
------------------------------------------------------------------------
-cleanExtractedNgrams :: Int -> ExtractedNgrams -> ExtractedNgrams
-cleanExtractedNgrams s (SimpleNgrams ng)
- | Text.length (ng ^. ngramsTerms) < s = SimpleNgrams ng
- | otherwise = SimpleNgrams $ text2ngrams (Text.take s (ng ^. ngramsTerms))
-cleanExtractedNgrams s _ = undefined
+enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
+enrichedTerms l pa po (Terms ng1 ng2) =
+ NgramsPostag l pa po form lem
+ where
+ form = text2ngrams $ Text.intercalate " " ng1
+ lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
-extracted2ngrams :: ExtractedNgrams -> Ngrams
-extracted2ngrams (SimpleNgrams ng) = ng
-extracted2ngrams _ = undefined
+------------------------------------------------------------------------
+cleanNgrams :: Int -> Ngrams -> Ngrams
+cleanNgrams s ng
+ | Text.length (ng ^. ngramsTerms) < s = ng
+ | otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms))
+cleanExtractedNgrams :: Int -> ExtractedNgrams -> ExtractedNgrams
+cleanExtractedNgrams s (SimpleNgrams ng) = SimpleNgrams $ (cleanNgrams s) ng
+cleanExtractedNgrams s (EnrichedNgrams ng) = EnrichedNgrams $ over np_form (cleanNgrams s)
+ $ over np_lem (cleanNgrams s) ng
-isSimpleNgrams :: ExtractedNgrams -> Bool
-isSimpleNgrams (SimpleNgrams _) = True
-isSimpleNgrams _ = False
+extracted2ngrams :: ExtractedNgrams -> Ngrams
+extracted2ngrams (SimpleNgrams ng) = ng
+extracted2ngrams (EnrichedNgrams ng) = view np_form ng
+---------------------------
insertExtractedNgrams :: [ ExtractedNgrams ] -> Cmd err (HashMap Text NgramsId)
insertExtractedNgrams ngs = do
let (s, e) = List.partition isSimpleNgrams ngs
m1 <- insertNgrams (map unSimpleNgrams s)
+ --printDebug "others" m1
+
m2 <- insertNgramsPostag (map unEnrichedNgrams e)
- pure $ m1 <> m2
+ --printDebug "terms" m2
+
+ let result = HashMap.union m1 m2
+ pure result
+isSimpleNgrams :: ExtractedNgrams -> Bool
+isSimpleNgrams (SimpleNgrams _) = True
+isSimpleNgrams _ = False
------------------------------------------------------------------------
-- | Terms from Text