2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Core.Text.Group
17 import Control.Lens (makeLenses, set)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Text (size)
23 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
24 import Gargantext.Database.Admin.Types.Node (NodeId)
25 -- import Gargantext.Core.Text.List.Learn (Model(..))
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
34 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
36 , stopSize :: !StopSize
38 | BuilderStep1 { withModel :: !Model }
39 | BuilderStepN { withModel :: !Model }
40 | Tficf { nlb_lang :: !Lang
43 , nlb_stopSize :: !StopSize
44 , nlb_userCorpusId :: !UserCorpusId
45 , nlb_masterCorpusId :: !MasterCorpusId
49 data StopSize = StopSize {unStopSize :: !Int}
51 -- | TODO: group with 2 terms only can be
52 -- discussed. Main purpose of this is offering
53 -- a first grouping option to user and get some
54 -- enriched data to better learn and improve that algo
55 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
56 , unGroupParams_len :: !Int
57 , unGroupParams_limit :: !Int
58 , unGroupParams_stopSize :: !StopSize
62 ngramsGroup :: GroupParams
65 ngramsGroup GroupIdentity = identity
66 ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
70 -- . (List.filter (\t -> Text.length t > m))
72 . Text.replace "-" " "
74 ------------------------------------------------------------------------
75 mergeMapParent :: Map Text (GroupedText b)
76 -> Map Text (Map Text Int)
77 -> Map Text (GroupedText b)
78 mergeMapParent = undefined
80 ------------------------------------------------------------------------
81 toGroupedText :: Ord b
87 -> Map Stem (GroupedText b)
88 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
90 group (t,d) = let t' = fun_stem t
101 groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
102 groupStems = Map.elems . groupStems'
104 groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
105 groupStems' = Map.fromListWith grouping
107 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
108 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
109 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
110 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
113 gr = Set.union group1 group2
114 nodes = Set.union nodes1 nodes2
116 ------------------------------------------------------------------------
117 type Group = Lang -> Int -> Int -> Text -> Text
120 data GroupedText score =
121 GroupedText { _gt_listType :: !(Maybe ListType)
122 , _gt_label :: !Label
123 , _gt_score :: !score
124 , _gt_children :: !(Set Text)
127 , _gt_nodes :: !(Set NodeId)
130 instance Show score => Show (GroupedText score) where
131 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
134 instance (Eq a) => Eq (GroupedText a) where
135 (==) (GroupedText _ _ score1 _ _ _ _)
136 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
138 instance (Eq a, Ord a) => Ord (GroupedText a) where
139 compare (GroupedText _ _ score1 _ _ _ _)
140 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
143 makeLenses 'GroupedText
145 ------------------------------------------------------------------------
146 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
147 addListType m g = set gt_listType (hasListType m g) g
149 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
150 hasListType m' (GroupedText _ label _ g' _ _ _) =
151 List.foldl' (<>) Nothing
152 $ map (\t -> Map.lookup t m')
154 $ Set.insert label g'