2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Core.Text.Group
17 import Control.Lens (makeLenses, set)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Text (size)
23 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
24 import Gargantext.Database.Admin.Types.Node (NodeId)
25 -- import Gargantext.Core.Text.List.Learn (Model(..))
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
34 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
36 , stopSize :: !StopSize
38 | BuilderStep1 { withModel :: !Model }
39 | BuilderStepN { withModel :: !Model }
40 | Tficf { nlb_lang :: !Lang
43 , nlb_stopSize :: !StopSize
44 , nlb_userCorpusId :: !UserCorpusId
45 , nlb_masterCorpusId :: !MasterCorpusId
49 data StopSize = StopSize {unStopSize :: !Int}
51 -- | TODO: group with 2 terms only can be
52 -- discussed. Main purpose of this is offering
53 -- a first grouping option to user and get some
54 -- enriched data to better learn and improve that algo
55 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
56 , unGroupParams_len :: !Int
57 , unGroupParams_limit :: !Int
58 , unGroupParams_stopSize :: !StopSize
62 ngramsGroup :: GroupParams
65 ngramsGroup GroupIdentity = identity
66 ngramsGroup (GroupParams l _m _n _) =
71 -- . (List.filter (\t -> Text.length t > m))
73 . Text.replace "-" " "
75 ------------------------------------------------------------------------
76 mergeMapParent :: Map Text (GroupedText b)
77 -> Map Text (Map Text Int)
78 -> Map Text (GroupedText b)
79 mergeMapParent = undefined
81 ------------------------------------------------------------------------
82 toGroupedText :: Ord b
88 -> Map Stem (GroupedText b)
89 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
91 group (t,d) = let t' = fun_stem t
102 groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
103 groupStems = Map.elems . groupStems'
105 groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
106 groupStems' = Map.fromListWith grouping
108 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
109 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
110 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
111 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
114 gr = Set.union group1 group2
115 nodes = Set.union nodes1 nodes2
117 ------------------------------------------------------------------------
118 type Group = Lang -> Int -> Int -> Text -> Text
121 data GroupedText score =
122 GroupedText { _gt_listType :: !(Maybe ListType)
123 , _gt_label :: !Label
124 , _gt_score :: !score
125 , _gt_children :: !(Set Text)
128 , _gt_nodes :: !(Set NodeId)
131 instance Show score => Show (GroupedText score) where
132 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
135 instance (Eq a) => Eq (GroupedText a) where
136 (==) (GroupedText _ _ score1 _ _ _ _)
137 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
139 instance (Eq a, Ord a) => Ord (GroupedText a) where
140 compare (GroupedText _ _ score1 _ _ _ _)
141 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
144 makeLenses 'GroupedText
146 ------------------------------------------------------------------------
147 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
148 addListType m g = set gt_listType (hasListType m g) g
150 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
151 hasListType m' (GroupedText _ label _ g' _ _ _) =
152 List.foldl' (<>) Nothing
153 $ map (\t -> Map.lookup t m')
155 $ Set.insert label g'