2 Module : Gargantext.Text.List.Learn
3 Description : Learn to make lists
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 CSV parser for Gargantext corpus files.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
20 module Gargantext.Text.List.Learn
23 import Control.Monad.Reader (MonadReader)
24 -- TODO remvoe this deps
25 import Gargantext.API.Admin.Settings
27 import Data.Maybe (maybe)
28 import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
29 import Gargantext.Prelude
30 import Gargantext.Prelude.Utils
31 import Gargantext.Text.Metrics.Count (occurrencesWith)
32 import qualified Data.IntMap as IntMap
33 import qualified Data.List as List
34 import qualified Data.Map as Map
35 import qualified Data.SVM as SVM
36 import qualified Data.Vector as Vec
38 ------------------------------------------------------------------------
39 train :: Double -> Double -> SVM.Problem -> IO SVM.Model
40 train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
42 predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
43 predict m vs = mapM (predict' m) vs
45 predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
47 ------------------------------------------------------------------------
48 trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
49 trainList x y = (train x y) . trainList'
51 trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
52 trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . listTypeId))
54 mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
55 mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
57 vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
58 vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
61 predictList :: Model -> [Vec.Vector Double] -> IO [Maybe ListType]
62 predictList (ModelSVM m _ _) vs = map (fromListTypeId . round) <$> predict m vs
64 ------------------------------------------------------------------------
65 data Model = ModelSVM { modelSVM :: SVM.Model
66 , param1 :: Maybe Double
67 , param2 :: Maybe Double
70 instance SaveFile Model
72 saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
74 instance ReadFile Model
78 pure $ ModelSVM m Nothing Nothing
80 ------------------------------------------------------------------------
83 -- split list : train / test
84 -- grid parameters on best result on test
86 type Train = Map ListType [Vec.Vector Double]
87 type Tests = Map ListType [Vec.Vector Double]
91 grid :: (MonadReader env m, MonadBase IO m, HasSettings env)
92 => Param -> Param -> Train -> [Tests] -> m (Maybe Model)
93 grid _ _ _ [] = panic "Gargantext.Text.List.Learn.grid : empty test data"
96 grid' :: (MonadReader env m, MonadBase IO m, HasSettings env)
101 grid' x y tr' te' = do
102 model'' <- liftBase $ trainList x y tr'
105 model' = ModelSVM model'' (Just x) (Just y)
107 score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
108 score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
110 score'' :: Map (Maybe Bool) Int -> Double
111 score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
113 total = fromIntegral $ foldl (+) 0 $ Map.elems m''
116 let (res, toGuess) = List.unzip
118 $ map (\(k,vs) -> zip (repeat k) vs)
121 res' <- liftBase $ predictList m toGuess
122 pure $ score'' $ score' $ List.zip res res'
124 score <- mapM (getScore model') te'
125 pure (mean score, model')
127 r <- head . List.reverse
129 <$> mapM (\(x,y) -> grid' x y tr te)
130 [(x,y) | x <- [s..e], y <- [s..e]]
132 printDebug "GRID SEARCH" (map fst r)
133 --printDebug "file" fp
134 --fp <- saveFile (ModelSVM model')