where
import Control.Lens hiding (both) -- ((^.), view, over, set, (_1), (_2))
+import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
-import Data.Maybe (catMaybes)
import Data.Monoid (mempty)
import Data.Ord (Down(..))
import Data.Set (Set)
-import Data.Text (Text)
import Data.Tuple.Extra (both)
-import Gargantext.API.Ngrams.Types (NgramsElement)
-import Gargantext.API.Ngrams.Types (RepoCmdM)
+import Gargantext.API.Ngrams.Types (NgramsElement, RepoCmdM, NgramsTerm(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Social
import Gargantext.Core.Text.List.Social.Prelude
-import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal)
+import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms)
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getNodesByNgramsOnlyUser)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
-import qualified Data.Char as Char
+import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
-import qualified Data.Text as Text
+import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
+
+{-
+-- TODO maybe useful for later
+isStopTerm :: StopSize -> Text -> Bool
+isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
+ where
+ isStopChar c = not (c `elem` ("- /()%" :: [Char]) || Char.isAlpha c)
+-}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
-buildNgramsOthersList user uCid groupParams (nt, MapListSize mapListSize) = do
- allTerms :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt
+buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
+ allTerms :: HashMap NgramsTerm (Set NodeId) <- getNodesByNgramsUser uCid nt
-- | PrivateFirst for first developments since Public NodeMode is not implemented yet
- socialLists' :: FlowCont Text FlowListScores
- <- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty
- $ Map.fromList
- $ List.zip (Map.keys allTerms)
+ socialLists :: FlowCont NgramsTerm FlowListScores
+ <- flowSocialList MySelfFirst user nt ( FlowCont HashMap.empty
+ $ HashMap.fromList
+ $ List.zip (HashMap.keys allTerms)
(List.cycle [mempty])
)
{-
- printDebug "flowSocialList'"
- $ Map.filter (not . ((==) Map.empty) . (view fls_parents))
- $ view flc_scores socialLists'
+ if nt == Sources -- Authors
+ then printDebug "flowSocialList" socialLists
+ else printDebug "flowSocialList" ""
-}
-
let
- groupedWithList = toGroupedTree groupParams socialLists' allTerms
-
+ groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
{-
- printDebug "groupedWithList"
- $ view flc_cont groupedWithList
+ if nt == Sources -- Authors
+ then printDebug "groupedWithList" groupedWithList
+ else printDebug "groupedWithList" ""
-}
let
- (stopTerms, tailTerms) = Map.partition ((== Just StopTerm) . viewListType) $ view flc_scores groupedWithList
- (mapTerms, tailTerms') = Map.partition ((== Just MapTerm) . viewListType) tailTerms
+ (stopTerms, tailTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
+ $ view flc_scores groupedWithList
+
+ (mapTerms, tailTerms') = HashMap.partition ((== Just MapTerm) . viewListType) tailTerms
listSize = mapListSize - (List.length mapTerms)
- (mapTerms', candiTerms) = both Map.fromList
+ (mapTerms', candiTerms) = both HashMap.fromList
$ List.splitAt listSize
$ List.sortOn (Down . viewScore . snd)
- $ Map.toList tailTerms'
+ $ HashMap.toList tailTerms'
pure $ Map.fromList [( nt, (toNgramsElement stopTerms)
- <> (toNgramsElement mapTerms )
- <> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' )
- <> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms)
- )]
+ <> (toNgramsElement mapTerms )
+ <> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' )
+ <> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms)
+ )]
-- TODO use ListIds
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
-buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
+buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
+-- | Filter 0 With Double
-- Computing global speGen score
- allTerms :: Map Text Double <- getTficf uCid mCid nt
+ allTerms :: HashMap NgramsTerm Double <- getTficf uCid mCid nt
-- | PrivateFirst for first developments since Public NodeMode is not implemented yet
- socialLists' :: FlowCont Text FlowListScores
- <- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty
- $ Map.fromList
- $ List.zip (Map.keys allTerms)
- (List.cycle [mempty])
+ socialLists :: FlowCont NgramsTerm FlowListScores
+ <- flowSocialList MySelfFirst user nt ( FlowCont HashMap.empty
+ $ HashMap.fromList
+ $ List.zip (HashMap.keys allTerms)
+ (List.cycle [mempty])
)
-{-
- printDebug "flowSocialList'"
- $ Map.filter (not . ((==) Map.empty) . (view fls_parents))
- $ view flc_scores socialLists'
-
--}
- let groupedWithList = toGroupedTree groupParams socialLists' allTerms
-{-
--- TODO remove
--- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
- -- First remove stops terms
- socialLists <- flowSocialList user nt (Set.fromList $ map fst $ Map.toList allTerms)
+ let socialLists_Stemmed = addScoreStem groupParams (HashMap.keysSet allTerms) socialLists
+ printDebug "socialLists_Stemmed" socialLists_Stemmed
+ let groupedWithList = toGroupedTree {- groupParams -} socialLists_Stemmed allTerms
+ (stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
+ $ view flc_scores groupedWithList
- -- Grouping the ngrams and keeping the maximum score for label
- let grouped = groupedTextWithStem ( GroupedTextParams (groupWith groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
+ (groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
- groupedWithList = map (addListType (invertForw socialLists)) grouped
--- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
--}
+ -- printDebug "stopTerms" stopTerms
- (stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType)
- $ view flc_scores groupedWithList
- -- (groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
- (groupedMono, groupedMult) = Map.partitionWithKey (\t _v -> size t < 2) candidateTerms
-
- -- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates
let
- listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if too small
+ -- use % of list if to big, or Int if too small
+ listSizeGlobal = 2000 :: Double
monoSize = 0.4 :: Double
multSize = 1 - monoSize
- splitAt n' ns = both (Map.fromListWith (<>))
+ splitAt n' ns = both (HashMap.fromListWith (<>))
$ List.splitAt (round $ n' * listSizeGlobal)
$ List.sortOn (viewScore . snd)
- $ Map.toList ns
+ $ HashMap.toList ns
(groupedMonoHead, groupedMonoTail) = splitAt monoSize groupedMono
(groupedMultHead, groupedMultTail) = splitAt multSize groupedMult
- -- printDebug "groupedMonoHead" (List.length groupedMonoHead)
- -- printDebug "groupedMonoTail" (List.length groupedMonoHead)
- -- printDebug "groupedMultHead" (List.length groupedMultHead)
- -- printDebug "groupedMultTail" (List.length groupedMultTail)
-
-{-
- let
- -- Get Local Scores now for selected grouped ngrams
- -- TODO HasTerms
- selectedTerms = Set.toList $ List.foldl'
- (\set' (GroupedText _ l' _ g _ _ _ ) -> Set.union set'
- $ Set.insert l' g
- )
- Set.empty
- (groupedMonoHead <> groupedMultHead)
--}
+-------------------------
+-- Filter 1 With Set NodeId and SpeGen
selectedTerms = Set.toList $ hasTerms (groupedMonoHead <> groupedMultHead)
- -- TO remove (and remove HasNodeError instance)
+
+
+ -- TO remove (and remove HasNodeError instance)
userListId <- defaultList uCid
masterListId <- defaultList mCid
+
mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId]
nt
selectedTerms
- -- TODO
- let
- groupedTreeScores_SetNodeId = setScoresWith mapTextDocIds (groupedMonoHead <> groupedMultHead)
-
-{-
--- TODO remove
--- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
let
- mapGroups = Map.fromList
- $ map (\g -> (g ^. gt_stem, g))
- $ groupedMonoHead <> groupedMultHead
-
- -- grouping with Set NodeId
- contextsAdded = foldl' (\mapGroups' k ->
- let k' = groupWith groupParams k in
- case Map.lookup k' mapGroups' of
- Nothing -> mapGroups'
- Just g -> case Map.lookup k mapTextDocIds of
- Nothing -> mapGroups'
- Just ns -> Map.insert k' (over gt_nodes (Set.union ns) g) mapGroups'
- )
- mapGroups
- $ Map.keys mapTextDocIds
--}
+ groupedTreeScores_SetNodeId :: HashMap NgramsTerm (GroupedTreeScores (Set NodeId))
+ groupedTreeScores_SetNodeId = setScoresWithMap mapTextDocIds (groupedMonoHead <> groupedMultHead)
-- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
- let mapCooc = Map.filter (>2)
- $ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
+ let mapCooc = HashMap.filter (>2)
+ $ HashMap.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds
]
where
- mapStemNodeIds = Map.toList
- $ Map.map viewScores
+ mapStemNodeIds = HashMap.toList
+ $ HashMap.map viewScores
$ groupedTreeScores_SetNodeId
let
-- computing scores
- mapScores f = Map.fromList
- $ map (\s@(Scored t g s') -> (t, f s))
+ mapScores f = HashMap.fromList
+ $ map (\g -> (view scored_terms g, f g))
$ normalizeGlobal
$ map normalizeLocal
- $ scored' mapCooc
+ $ scored'
+ $ Map.fromList -- TODO remove this
+ $ HashMap.toList mapCooc
let
- -- groupedTreeScores_SpeGen :: GroupedTreeScores (Scored Double)
- groupedTreeScores_SpeGen = setScoresWith (mapScores identity) (groupedMonoHead <> groupedMultHead)
-
-{-
--- TODO remove
--- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
- groupsWithScores = catMaybes
- $ map (\(stem, g)
- -> case Map.lookup stem mapScores' of
- Nothing -> Nothing
- Just s' -> set gts'_score s' g
- ) $ Map.toList $ view flc_scores contextsAdded
- where
- mapScores' = mapScores identity
--- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
--- TODO remove
--}
- -- adapt2 TOCHECK with DC
- -- printDebug "groupsWithScores" groupsWithScores
--- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
--- TODO remove
---}
+ groupedTreeScores_SpeGen :: HashMap NgramsTerm (GroupedTreeScores (Scored NgramsTerm))
+ groupedTreeScores_SpeGen = setScoresWithMap (mapScores identity)
+ ( groupedMonoHead
+ <> groupedMultHead
+ )
let
-- sort / partition / split
- -- filter mono/multi again
- (monoScored, multScored) = Map.partitionWithKey (\t _v -> size t < 2) groupedTreeScores_SpeGen
- -- (monoScored, multScored) = List.partition (\g -> _gt_size g < 2) groupsWithScores
+ -- filter mono/multi again
+ (monoScored, multScored) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) groupedTreeScores_SpeGen
-- filter with max score
- -- partitionWithMaxScore = List.partition (\g -> let (s1,s2) = viewScore g in s1 > s2 )
- partitionWithMaxScore = Map.partition (\g -> (view scored_genInc $ view gts'_score g)
+ partitionWithMaxScore = HashMap.partition (\g -> (view scored_genInc $ view gts'_score g)
> (view scored_speExc $ view gts'_score g)
)
(monoScoredIncl, monoScoredExcl) = partitionWithMaxScore monoScored
(multScoredIncl, multScoredExcl) = partitionWithMaxScore multScored
- -- splitAt
+ -- splitAt
let
- listSizeLocal = 1000 :: Double -- use % of list if to big, or Int if to small
+ -- use % of list if to big, or Int if to small
+ listSizeLocal = 1000 :: Double
inclSize = 0.4 :: Double
exclSize = 1 - inclSize
- --splitAt' n' = (both (Map.fromListWith (<>))) . (List.splitAt (round $ n' * listSizeLocal))
- splitAt' n' = (both (Map.fromList)) . (List.splitAt (round $ n' * listSizeLocal))
-
- sortOn f = (List.sortOn (Down . f . _gts'_score . snd)) . Map.toList
- --sortOn f = (List.sortOn (Down . (gts'_score))) . Map.toList
- -- sort = (List.sortOn (Down . viewScore))
+ splitAt' n' = (both (HashMap.fromList)) . (List.splitAt (round $ n' * listSizeLocal))
+ sortOn f = (List.sortOn (Down . (view (gts'_score . f)) . snd)) . HashMap.toList
- (monoScoredInclHead, monoScoredInclTail) = splitAt' (monoSize * inclSize / 2) $ (sortOn _scored_genInc) monoScoredIncl
- (monoScoredExclHead, monoScoredExclTail) = splitAt' (monoSize * inclSize / 2) $ (sortOn _scored_speExc) monoScoredExcl
- (multScoredInclHead, multScoredInclTail) = splitAt' (multSize * exclSize / 2) $ (sortOn _scored_genInc) multScoredIncl
- (multScoredExclHead, multScoredExclTail) = splitAt' (multSize * exclSize / 2) $ (sortOn _scored_speExc) multScoredExcl
+ monoInc_size = splitAt' $ monoSize * inclSize / 2
+ (monoScoredInclHead, monoScoredInclTail) = monoInc_size $ (sortOn scored_genInc) monoScoredIncl
+ (monoScoredExclHead, monoScoredExclTail) = monoInc_size $ (sortOn scored_speExc) monoScoredExcl
+ multExc_size = splitAt' $ multSize * exclSize / 2
+ (multScoredInclHead, multScoredInclTail) = multExc_size $ (sortOn scored_genInc) multScoredIncl
+ (multScoredExclHead, multScoredExclTail) = multExc_size $ (sortOn scored_speExc) multScoredExcl
+------------------------------------------------------------
-- Final Step building the Typed list
termListHead = maps <> cands
where
maps = setListType (Just MapTerm)
- $ monoScoredInclHead
- <> monoScoredExclHead
- <> multScoredInclHead
- <> multScoredExclHead
+ $ monoScoredInclHead
+ <> monoScoredExclHead
+ <> multScoredInclHead
+ <> multScoredExclHead
cands = setListType (Just CandidateTerm)
- $ monoScoredInclTail
- <> monoScoredExclTail
- <> multScoredInclTail
- <> multScoredExclTail
+ $ monoScoredInclTail
+ <> monoScoredExclTail
+ <> multScoredInclTail
+ <> multScoredExclTail
termListTail = (setListType (Just CandidateTerm)) (groupedMonoTail <> groupedMultTail)
--- printDebug "monoScoredInclHead" monoScoredInclHead
--- printDebug "monoScoredExclHead" monoScoredExclTail
--- printDebug "multScoredInclHead" multScoredInclHead
--- printDebug "multScoredExclTail" multScoredExclTail
-
let result = Map.unionsWith (<>)
[ Map.fromList [( nt, toNgramsElement termListHead
<> toNgramsElement termListTail
<> toNgramsElement stopTerms
)]
]
- -- printDebug "\n result \n" r
-
- pure result
-
+ -- printDebug "result" result
-toGargList :: Maybe ListType -> b -> (Maybe ListType, b)
-toGargList l n = (l,n)
-
-
-isStopTerm :: StopSize -> Text -> Bool
-isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
- where
- isStopChar c = not (c `elem` ("- /()%" :: [Char]) || Char.isAlpha c)
-
-------------------------------------------------------------------------------
+ pure result