2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Core.Text.Group
17 import Control.Lens (makeLenses, set)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Types (ListType(..))
23 import Gargantext.Database.Admin.Types.Node (NodeId)
24 import Gargantext.Core.Text.List.Learn (Model(..))
25 import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
33 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
35 , stopSize :: !StopSize
37 | BuilderStep1 { withModel :: !Model }
38 | BuilderStepN { withModel :: !Model }
39 | Tficf { nlb_lang :: !Lang
42 , nlb_stopSize :: !StopSize
43 , nlb_userCorpusId :: !UserCorpusId
44 , nlb_masterCorpusId :: !MasterCorpusId
47 data StopSize = StopSize {unStopSize :: !Int}
49 -- | TODO: group with 2 terms only can be
50 -- discussed. Main purpose of this is offering
51 -- a first grouping option to user and get some
52 -- enriched data to better learn and improve that algo
60 ngramsGroup l _m _n = Text.intercalate " "
64 -- . (List.filter (\t -> Text.length t > m))
66 . Text.replace "-" " "
68 ------------------------------------------------------------------------------
69 type Group = Lang -> Int -> Int -> Text -> Text
72 data GroupedText score =
73 GroupedText { _gt_listType :: !(Maybe ListType)
76 , _gt_group :: !(Set Text)
79 , _gt_nodes :: !(Set NodeId)
81 instance Show score => Show (GroupedText score) where
82 show (GroupedText _ l s _ _ _ _) = show l <> ":" <> show s
84 instance (Eq a) => Eq (GroupedText a) where
85 (==) (GroupedText _ _ score1 _ _ _ _)
86 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
88 instance (Eq a, Ord a) => Ord (GroupedText a) where
89 compare (GroupedText _ _ score1 _ _ _ _)
90 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
93 makeLenses 'GroupedText
95 ------------------------------------------------------------------------------
96 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
97 addListType m g = set gt_listType lt g
101 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
102 hasListType m (GroupedText _ label _ g _ _ _) =
103 List.foldl' (<>) Nothing
104 $ map (\t -> Map.lookup t m)