]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
[Node write upload] Add params
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 import Conduit
20 import Control.Lens ((^.))
21 import Data.Aeson
22 import Data.Either (Either(..), rights)
23 import Data.Swagger
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
26 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
27 import Gargantext.API.Admin.Types (HasSettings)
28 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
29 import Gargantext.API.Prelude (GargM, GargError)
30 import Gargantext.Core (Lang(..))
31 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
32 import Gargantext.Core.Text.Terms (TermType(..))
33 import Gargantext.Core.Types.Individu (User(..))
34 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
37 import Gargantext.Database.Admin.Types.Hyperdata.Frame
38 import Gargantext.Database.Admin.Types.Node
39 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
40 import Gargantext.Database.Schema.Node (node_hyperdata)
41 import qualified Data.List as List
42 import qualified Gargantext.Defaults as Defaults
43 import Gargantext.Prelude
44 import Gargantext.Utils.Jobs (serveJobsAPI)
45 import GHC.Generics (Generic)
46 import Servant
47 import Data.Text (Text)
48 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
49 import Text.Read (readMaybe)
50 import Data.Maybe (fromMaybe)
51
52 ------------------------------------------------------------------------
53 type API = Summary " Documents from Write nodes."
54 :> AsyncJobs JobLog '[JSON] Params JobLog
55 ------------------------------------------------------------------------
56 data Params = Params
57 { id :: Int
58 , paragraphs :: Text
59 , lang :: Lang
60 , selection :: FlowSocialListWith
61 }
62 deriving (Generic, Show)
63 instance FromJSON Params where
64 parseJSON = genericParseJSON defaultOptions
65 instance ToJSON Params where
66 toJSON = genericToJSON defaultOptions
67 instance ToSchema Params
68 ------------------------------------------------------------------------
69 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
70 api uId nId =
71 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
72 let
73 log' x = do
74 liftBase $ log'' x
75 in documentsFromWriteNodes uId nId p (liftBase . log')
76
77 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
78 => UserId
79 -> NodeId
80 -> Params
81 -> (JobLog -> m ())
82 -> m JobLog
83 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
84 let jobLog = JobLog { _scst_succeeded = Just 1
85 , _scst_failed = Just 0
86 , _scst_remaining = Just 1
87 , _scst_events = Just []
88 }
89 logStatus jobLog
90
91 mcId <- getClosestParentIdByType' nId NodeCorpus
92 cId <- case mcId of
93 Just cId -> pure cId
94 Nothing -> do
95 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
96 logStatus $ jobLogFailTotalWithMessage msg jobLog
97 panic msg
98
99 frameWriteIds <- getChildrenByType nId NodeFrameWrite
100
101 -- https://write.frame.gargantext.org/<frame_id>/download
102 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
103
104 frameWritesWithContents <- liftBase $
105 mapM (\node -> do
106 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
107 pure (node, contents)
108 ) frameWrites
109
110 let paragraphs' = readMaybe $ T.unpack paragraphs :: Maybe Int
111
112 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (fromMaybe 7 paragraphs') (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
113 -- TODO hard coded param should be take
114 let parsed = List.concat $ rights parsedE
115
116 _ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi lang) cId (Just selection) logStatus
117
118 pure $ jobLogSuccess jobLog
119 ------------------------------------------------------------------------
120 hyperdataDocumentFromFrameWrite :: Int -> (HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
121 hyperdataDocumentFromFrameWrite paragraphSize (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
122 case parseLines contents of
123 Left _ -> Left "Error parsing node"
124 Right (Parsed { authors, contents = ctxts, date, source, title = t }) ->
125 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
126 authors' = T.concat $ authorJoinSingle <$> authors
127 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
128 , T.pack $ show month, "-"
129 , T.pack $ show day ]) <$> date
130 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
131 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
132 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
133 Right (List.map (\ctxt -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
134 , _hd_doi = Nothing
135 , _hd_url = Nothing
136 , _hd_uniqId = Nothing
137 , _hd_uniqIdBdd = Nothing
138 , _hd_page = Nothing
139 , _hd_title = Just t
140 , _hd_authors = Just authors'
141 , _hd_institutes = Nothing
142 , _hd_source = source
143 , _hd_abstract = Just ctxt
144 , _hd_publication_date = date'
145 , _hd_publication_year = Just year'
146 , _hd_publication_month = Just month'
147 , _hd_publication_day = Just day'
148 , _hd_publication_hour = Nothing
149 , _hd_publication_minute = Nothing
150 , _hd_publication_second = Nothing
151 , _hd_language_iso2 = Just $ T.pack $ show EN }
152 ) (text2paragraphs paragraphSize ctxts)
153 )