]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/List.hs
[INDEXING] WIP
[gargantext.git] / src / Gargantext / API / Ngrams / List.hs
1 {-|
2 Module : Gargantext.API.Ngrams.List
3 Description : Get Ngrams (lists)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
14
15 module Gargantext.API.Ngrams.List
16 where
17
18 import Data.Maybe (catMaybes)
19 import Control.Lens hiding (elements)
20 import Data.Aeson
21 import Data.Map (toList, fromList)
22 import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
23 import Data.Text (Text, concat, pack)
24 import GHC.Generics (Generic)
25 import Gargantext.API.Admin.Orchestrator.Types
26 import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams, NgramsTerm)
27 import Gargantext.API.Ngrams.Types (RepoCmdM, Versioned(..), NgramsList, NgramsTerm(..))
28 import Gargantext.API.Node.Corpus.New.File (FileType(..))
29 import Gargantext.API.Prelude (GargServer, GargNoServer)
30 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
31 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
32 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Admin.Types.Node
35 import Gargantext.Database.Admin.Types.Hyperdata.Document
36 import Gargantext.Database.Schema.Ngrams (ngramsTypes, NgramsType(..))
37 import Gargantext.Database.Query.Table.Node (getDocumentsWithParentId)
38 import Gargantext.Prelude
39 import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
40 import Network.HTTP.Media ((//), (/:))
41 import Servant
42 import Servant.Job.Async
43 import Servant.Job.Utils (jsonOptions)
44 import Web.FormUrlEncoded (FromForm)
45 import qualified Data.HashMap.Strict as HashMap
46 import qualified Data.Text as Text
47
48 ------------------------------------------------------------------------
49 type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
50 -- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
51 :<|> PostAPI
52
53 api :: ListId -> GargServer API
54 api l = get l :<|> postAsync l
55
56 data HTML
57 instance Accept HTML where
58 contentType _ = "text" // "html" /: ("charset", "utf-8")
59 instance ToJSON a => MimeRender HTML a where
60 mimeRender _ = encode
61
62 ------------------------------------------------------------------------
63 get :: RepoCmdM env err m =>
64 ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
65 get lId = do
66 lst <- get' lId
67 let (NodeId id) = lId
68 return $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
69 , pack $ show id
70 , ".json"
71 ]
72 ) lst
73
74 get' :: RepoCmdM env err m
75 => ListId -> m NgramsList
76 get' lId = fromList
77 <$> zip ngramsTypes
78 <$> mapM (getNgramsTableMap lId) ngramsTypes
79
80 ------------------------------------------------------------------------
81 -- TODO : purge list
82 -- TODO talk
83 post :: FlowCmdM env err m
84 => ListId
85 -> NgramsList
86 -> m Bool
87 post l m = do
88 -- TODO check with Version for optim
89 _ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m
90 -- TODO reindex
91 pure True
92
93
94 -----------------------------------------------------------------------------
95 -- | Re-index documents of a corpus with new ngrams (called orphans here)
96 reIndexWith :: CorpusId
97 -> ListId
98 -> NgramsType
99 -> [NgramsTerm]
100 -> GargNoServer ()
101 reIndexWith cId lId nt ts = do
102 docs <- getDocumentsWithParentId cId
103
104 -- Taking the ngrams with 0 occurrences only (orphans)
105 orphans <- map (\k -> ([unNgramsTerm k], []))
106 <$> HashMap.keys
107 <$> HashMap.filter (==0)
108 <$> getOccByNgramsOnlyFast' cId lId nt ts
109
110 -- Checking Text documents where orphans match
111 let
112 docMatched =
113 map (\doc -> ( doc ^. node_id
114 , termsInText (buildPatterns orphans)
115 ( Text.unlines
116 $ catMaybes
117 [ doc ^. node_hyperdata . hd_title
118 , doc ^. node_hyperdata . hd_abstract
119 ]
120 )
121 )
122 ) docs
123
124 -- Saving the indexation in database
125
126 pure ()
127
128 ------------------------------------------------------------------------
129 ------------------------------------------------------------------------
130 type PostAPI = Summary "Update List"
131 :> "add"
132 :> "form"
133 :> "async"
134 :> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
135
136 postAsync :: ListId -> GargServer PostAPI
137 postAsync lId =
138 serveJobsAPI $
139 JobFunction (\f log' -> postAsync' lId f (liftBase . log'))
140
141 postAsync' :: FlowCmdM env err m
142 => ListId
143 -> WithFile
144 -> (JobLog -> m ())
145 -> m JobLog
146 postAsync' l (WithFile _ m _) logStatus = do
147
148 logStatus JobLog { _scst_succeeded = Just 0
149 , _scst_failed = Just 0
150 , _scst_remaining = Just 1
151 , _scst_events = Just []
152 }
153 _r <- post l m
154
155 pure JobLog { _scst_succeeded = Just 1
156 , _scst_failed = Just 0
157 , _scst_remaining = Just 0
158 , _scst_events = Just []
159 }
160
161 data WithFile = WithFile
162 { _wf_filetype :: !FileType
163 , _wf_data :: !NgramsList
164 , _wf_name :: !Text
165 } deriving (Eq, Show, Generic)
166
167 makeLenses ''WithFile
168 instance FromForm WithFile
169 instance FromJSON WithFile where
170 parseJSON = genericParseJSON $ jsonOptions "_wf_"
171 instance ToJSON WithFile where
172 toJSON = genericToJSON $ jsonOptions "_wf_"
173 instance ToSchema WithFile where
174 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")