-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Text.Terms.WithList where
import Data.Text (Text)
import qualified Data.IntMap.Strict as IntMap
-import Gargantext.Core.Types (Terms(..))
import Gargantext.Text.Context
import Gargantext.Text.Terms.Mono (monoTextsBySentence)
+import Prelude (error)
import Gargantext.Prelude
-import Data.List (concatMap)
+import Data.List (null, concatMap)
import Data.Ord
-import qualified Data.Set as Set
------------------------------------------------------------------------
data Pattern = Pattern
- { _pat_table :: !(KMP.Table Term)
+ { _pat_table :: !(KMP.Table Text)
, _pat_length :: !Int
- , _pat_terms :: !Terms
+ , _pat_terms :: ![Text]
}
type Patterns = [Pattern]
------------------------------------------------------------------------
-replaceTerms :: Patterns -> Sentence Term -> Sentence Terms
+replaceTerms :: Patterns -> [Text] -> [[Text]]
replaceTerms pats terms = go 0
where
terms_len = length terms
m =
IntMap.fromListWith merge
[ (ix, (len, term))
- | Pattern pat len term <- pats, ix <- KMP.match pat (_terms_label term) ]
+ | Pattern pat len term <- pats, ix <- KMP.match pat terms ]
buildPatterns :: TermList -> Patterns
buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
where
buildPattern (label, alts) = map f (label : alts)
where
- f alt = Pattern (KMP.build alt) (length alt)
- (Terms label $ Set.empty) -- TODO check stems
+ f alt | "" `elem` alt = error "buildPatterns: ERR1"
+ | null alt = error "buildPatterns: ERR2"
+ | otherwise =
+ Pattern (KMP.build alt) (length alt) label
+ --(Terms label $ Set.empty) -- TODO check stems
-extractTermsWithList :: Patterns -> Text -> Corpus Terms
+extractTermsWithList :: Patterns -> Text -> Corpus [Text]
extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence