]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
Merge branch 'dev-phylo' into dev
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13
14 module Gargantext.Core.Text.Group
15 where
16
17 import Control.Lens (makeLenses, set)
18 import Data.Set (Set)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Text (size)
23 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
24 import Gargantext.Database.Admin.Types.Node (NodeId)
25 -- import Gargantext.Core.Text.List.Learn (Model(..))
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
32
33 {-
34 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
35 , stemX :: !Int
36 , stopSize :: !StopSize
37 }
38 | BuilderStep1 { withModel :: !Model }
39 | BuilderStepN { withModel :: !Model }
40 | Tficf { nlb_lang :: !Lang
41 , nlb_group1 :: !Int
42 , nlb_group2 :: !Int
43 , nlb_stopSize :: !StopSize
44 , nlb_userCorpusId :: !UserCorpusId
45 , nlb_masterCorpusId :: !MasterCorpusId
46 }
47 -}
48
49 data StopSize = StopSize {unStopSize :: !Int}
50
51 -- | TODO: group with 2 terms only can be
52 -- discussed. Main purpose of this is offering
53 -- a first grouping option to user and get some
54 -- enriched data to better learn and improve that algo
55 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
56 , unGroupParams_len :: !Int
57 , unGroupParams_limit :: !Int
58 , unGroupParams_stopSize :: !StopSize
59 }
60 | GroupIdentity
61
62 ngramsGroup :: GroupParams
63 -> Text
64 -> Text
65 ngramsGroup GroupIdentity = identity
66 ngramsGroup (GroupParams l _m _n _) =
67 Text.intercalate " "
68 . map (stem l)
69 -- . take n
70 . List.sort
71 -- . (List.filter (\t -> Text.length t > m))
72 . Text.splitOn " "
73 . Text.replace "-" " "
74
75 ------------------------------------------------------------------------
76 mergeMapParent :: Map Text (GroupedText b)
77 -> Map Text (Map Text Int)
78 -> Map Text (GroupedText b)
79 mergeMapParent = undefined
80
81 ------------------------------------------------------------------------
82 toGroupedText :: Ord b
83 => (Text -> Text )
84 -> (a -> b )
85 -> (a -> Set Text )
86 -> (a -> Set NodeId)
87 -> [(Text,a)]
88 -> Map Stem (GroupedText b)
89 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
90 where
91 group (t,d) = let t' = fun_stem t
92 in (t', GroupedText
93 Nothing
94 t
95 (fun_score d)
96 (fun_texts d)
97 (size t)
98 t'
99 (fun_nodeIds d)
100 )
101
102 groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
103 groupStems = Map.elems . groupStems'
104
105 groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
106 groupStems' = Map.fromListWith grouping
107 where
108 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
109 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
110 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
111 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
112 where
113 lt = lt1 <> lt2
114 gr = Set.union group1 group2
115 nodes = Set.union nodes1 nodes2
116
117 ------------------------------------------------------------------------
118 type Group = Lang -> Int -> Int -> Text -> Text
119 type Stem = Text
120 type Label = Text
121 data GroupedText score =
122 GroupedText { _gt_listType :: !(Maybe ListType)
123 , _gt_label :: !Label
124 , _gt_score :: !score
125 , _gt_children :: !(Set Text)
126 , _gt_size :: !Int
127 , _gt_stem :: !Stem
128 , _gt_nodes :: !(Set NodeId)
129 } {-deriving Show--}
130 --{-
131 instance Show score => Show (GroupedText score) where
132 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
133 --}
134
135 instance (Eq a) => Eq (GroupedText a) where
136 (==) (GroupedText _ _ score1 _ _ _ _)
137 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
138
139 instance (Eq a, Ord a) => Ord (GroupedText a) where
140 compare (GroupedText _ _ score1 _ _ _ _)
141 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
142
143 -- Lenses Instances
144 makeLenses 'GroupedText
145
146 ------------------------------------------------------------------------
147 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
148 addListType m g = set gt_listType (hasListType m g) g
149 where
150 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
151 hasListType m' (GroupedText _ label _ g' _ _ _) =
152 List.foldl' (<>) Nothing
153 $ map (\t -> Map.lookup t m')
154 $ Set.toList
155 $ Set.insert label g'