2 Module : Gargantext.Core.Text.List.Learn
3 Description : Learn to make lists
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 CSV parser for Gargantext corpus files.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.Core.Text.List.Learn
20 import qualified Data.IntMap as IntMap
21 import qualified Data.List as List
22 import Data.Map.Strict (Map)
23 import qualified Data.Map.Strict as Map
24 import qualified Data.SVM as SVM
25 import qualified Data.Vector as Vec
27 import Gargantext.Core
28 import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
29 import Gargantext.Core.Types.Main (ListType(..))
30 import Gargantext.Prelude
31 import Gargantext.Database.GargDB
33 ------------------------------------------------------------------------
34 train :: Double -> Double -> SVM.Problem -> IO SVM.Model
35 train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
37 predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
38 predict m vs = mapM (predict' m) vs
40 predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
42 ------------------------------------------------------------------------
43 trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
44 trainList x y = (train x y) . trainList'
46 trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
47 trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . toDBid))
49 mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
50 mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
52 vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
53 vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
56 predictList :: Model -> [Vec.Vector Double] -> IO [Maybe ListType]
57 predictList (ModelSVM m _ _) vs = map (Just . fromDBid . round) <$> predict m vs
59 ------------------------------------------------------------------------
60 data Model = ModelSVM { modelSVM :: SVM.Model
61 , param1 :: Maybe Double
62 , param2 :: Maybe Double
65 instance SaveFile Model
67 saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
69 instance ReadFile Model
73 pure $ ModelSVM m Nothing Nothing
75 ------------------------------------------------------------------------
78 -- split list : train / test
79 -- grid parameters on best result on test
81 type Train = Map ListType [Vec.Vector Double]
82 type Tests = Map ListType [Vec.Vector Double]
86 grid :: (MonadBase IO m)
87 => Param -> Param -> Train -> [Tests] -> m (Maybe Model)
88 grid _ _ _ [] = panic "Gargantext.Core.Text.List.Learn.grid : empty test data"
91 grid' :: (MonadBase IO m)
96 grid' x y tr' te' = do
97 model'' <- liftBase $ trainList x y tr'
100 model' = ModelSVM model'' (Just x) (Just y)
102 score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
103 score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
105 score'' :: Map (Maybe Bool) Int -> Double
106 score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
108 total = fromIntegral $ foldl (+) 0 $ Map.elems m''
111 let (res, toGuess) = List.unzip
113 $ map (\(k,vs) -> zip (repeat k) vs)
116 res' <- liftBase $ predictList m toGuess
117 pure $ score'' $ score' $ List.zip res res'
119 score <- mapM (getScore model') te'
120 pure (mean score, model')
122 r <- head . List.reverse
124 <$> mapM (\(x,y) -> grid' x y tr te)
125 [(x,y) | x <- [s..e], y <- [s..e]]
127 -- printDebug "GRID SEARCH" (map fst r)
128 -- printDebug "file" fp
129 --fp <- saveFile (ModelSVM model')