2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Core.Text.Group
17 import Control.Lens (makeLenses, set)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Types (ListType(..))
23 import Gargantext.Database.Admin.Types.Node (NodeId)
24 import Gargantext.Core.Text.List.Learn (Model(..))
25 import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
33 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
35 , stopSize :: !StopSize
37 | BuilderStep1 { withModel :: !Model }
38 | BuilderStepN { withModel :: !Model }
39 | Tficf { nlb_lang :: !Lang
42 , nlb_stopSize :: !StopSize
43 , nlb_userCorpusId :: !UserCorpusId
44 , nlb_masterCorpusId :: !MasterCorpusId
47 data StopSize = StopSize {unStopSize :: !Int}
49 -- | TODO: group with 2 terms only can be
50 -- discussed. Main purpose of this is offering
51 -- a first grouping option to user and get some
52 -- enriched data to better learn and improve that algo
54 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
55 , unGroupParams_len :: !Int
56 , unGroupParams_limit :: !Int
57 , unGroupParams_stopSize :: !StopSize
60 ngramsGroup :: GroupParams
63 ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
67 -- . (List.filter (\t -> Text.length t > m))
69 . Text.replace "-" " "
71 ------------------------------------------------------------------------------
72 type Group = Lang -> Int -> Int -> Text -> Text
75 data GroupedText score =
76 GroupedText { _gt_listType :: !(Maybe ListType)
79 , _gt_group :: !(Set Text)
82 , _gt_nodes :: !(Set NodeId)
84 instance Show score => Show (GroupedText score) where
85 show (GroupedText _ l s _ _ _ _) = show l <> ":" <> show s
87 instance (Eq a) => Eq (GroupedText a) where
88 (==) (GroupedText _ _ score1 _ _ _ _)
89 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
91 instance (Eq a, Ord a) => Ord (GroupedText a) where
92 compare (GroupedText _ _ score1 _ _ _ _)
93 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
96 makeLenses 'GroupedText
98 ------------------------------------------------------------------------------
99 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
100 addListType m g = set gt_listType lt g
104 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
105 hasListType m (GroupedText _ label _ g _ _ _) =
106 List.foldl' (<>) Nothing
107 $ map (\t -> Map.lookup t m)