module Gargantext.Viz.Phylo.Aggregates.Document
where
-import Data.List (last,head)
-import Data.Map (Map)
-import Data.Text (Text, unwords, toLower, words)
-import Data.Tuple (fst, snd)
-import Data.Tuple.Extra
+import Data.Map (Map,fromListWith)
+import Data.Text (Text)
+import Data.Tuple (fst)
import Data.Vector (Vector)
-
-import Gargantext.Prelude hiding (head)
-import Gargantext.Text.Terms.Mono (monoTexts)
+import Gargantext.Prelude
+import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo
-import Gargantext.Viz.Phylo.Tools
-
import qualified Data.List as List
import qualified Data.Map as Map
-import qualified Data.Vector as Vector
+import qualified Data.Vector as Vector
+
+import Debug.Trace (trace)
--- | To init a set of periods out of a given Grain and Step
-docsToPeriods :: (Ord date, Enum date) => (doc -> date)
- -> Grain -> Step -> [doc] -> Map (date, date) [doc]
-docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
-docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
+-- | To init a list of Periods framed by a starting Date and an ending Date
+initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
+initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
+ $ chunkAlong g s [start .. end]
+
+
+-- | To group a list of Documents by fixed periods
+groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
+groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
+groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where
- --------------------------------------
- hs = steps g s $ both f (head es, last es)
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
- --------------------------------------
- steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
- steps s' o' (start,end) = map (\l -> (head l, last l))
- $ chunkAlong s' o' [start .. end]
- --------------------------------------
+ --------------------------------------
--- | To parse a list of Documents by filtering on a Vector of Ngrams
-parseDocs :: PhyloNgrams -> [Document] -> [Document]
-parseDocs l docs = map (\(Document d t)
- -> Document d ( unwords
- $ filter (\x -> Vector.elem x l)
- $ monoTexts t)) docs
+-- | To parse a list of Documents by filtering on a Vector of Ngrams
+parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
+parseDocs roots c = map (\(d,t)
+ -> Document d ( filter (\x -> Vector.elem x roots)
+ $ monoTexts t)) c
+
+-- | To count the number of documents by year
+countDocs :: [(Date,a)] -> Map Date Double
+countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
--- | To group a list of Documents by fixed periods
-groupDocsByPeriod :: Grain -> Step -> [Document] -> PhyloNgrams -> Map (Date, Date) [Document]
-groupDocsByPeriod g s docs ngrams = docsToPeriods date g s $ parseDocs ngrams docs
--- | To transform a corpus of texts into a structured list of Documents
-corpusToDocs :: [(Date, Text)] -> [Document]
-corpusToDocs l = map (\(d,t) -> Document d t) l
\ No newline at end of file