2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 -- import Data.Maybe (fromMaybe)
21 import Control.Lens ((^.))
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
32 import Gargantext.API.Prelude (GargM, GargError)
33 import Gargantext.Core (Lang(..))
34 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
35 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
36 import Gargantext.Core.Text.Terms (TermType(..))
37 import Gargantext.Core.Types.Individu (User(..))
38 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
39 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Hyperdata.Frame
42 import Gargantext.Database.Admin.Types.Node
43 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
44 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
45 import Gargantext.Prelude
46 import Gargantext.Utils.Jobs (serveJobsAPI)
47 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
49 import Text.Read (readMaybe)
50 import qualified Data.List as List
51 import qualified Data.Text as T
52 -- import qualified Gargantext.Defaults as Defaults
54 ------------------------------------------------------------------------
55 type API = Summary " Documents from Write nodes."
56 :> AsyncJobs JobLog '[JSON] Params JobLog
57 ------------------------------------------------------------------------
62 , selection :: FlowSocialListWith
64 deriving (Generic, Show)
65 instance FromJSON Params where
66 parseJSON = genericParseJSON defaultOptions
67 instance ToJSON Params where
68 toJSON = genericToJSON defaultOptions
69 instance ToSchema Params
70 ------------------------------------------------------------------------
71 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
73 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
77 in documentsFromWriteNodes uId nId p (liftBase . log')
79 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
85 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
86 let jobLog = JobLog { _scst_succeeded = Just 1
87 , _scst_failed = Just 0
88 , _scst_remaining = Just 1
89 , _scst_events = Just []
93 mcId <- getClosestParentIdByType' nId NodeCorpus
97 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
98 logStatus $ jobLogFailTotalWithMessage msg jobLog
101 frameWriteIds <- getChildrenByType nId NodeFrameWrite
103 -- https://write.frame.gargantext.org/<frame_id>/download
104 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
106 frameWritesWithContents <- liftBase $
108 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
109 pure (node, contents)
112 let paragraphs' = readMaybe $ T.unpack paragraphs :: Maybe Int
114 let parsedE = (\(node, contents)
115 -> hyperdataDocumentFromFrameWrite lang (fromMaybe 7 paragraphs') (node, contents)) <$> frameWritesWithContents
116 let parsed = List.concat $ rights parsedE
118 _ <- flowDataText (RootId (NodeId uId))
119 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
125 pure $ jobLogSuccess jobLog
127 ------------------------------------------------------------------------
128 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
129 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
130 case parseLines contents of
131 Left _ -> Left "Error parsing node"
132 Right (Parsed { authors, contents = ctxts}) ->
133 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
134 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
135 authors' = T.concat $ authorJoinSingle <$> authors
138 (year',month',day') = split' (node^. node_date)
139 date' = Just $ T.concat [ T.pack $ show year', "-"
140 , T.pack $ show month', "-"
146 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
147 , T.pack $ show month', "-"
148 , T.pack $ show day' ]) <$> date
149 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
150 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
151 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
154 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
157 , _hd_uniqId = Nothing
158 , _hd_uniqIdBdd = Nothing
161 , _hd_authors = Just authors'
162 , _hd_institutes = Nothing
163 , _hd_source = Just $ node ^. node_name
164 , _hd_abstract = Just ctxt
165 , _hd_publication_date = date'
166 , _hd_publication_year = Just year'
167 , _hd_publication_month = Just month'
168 , _hd_publication_day = Just day'
169 , _hd_publication_hour = Nothing
170 , _hd_publication_minute = Nothing
171 , _hd_publication_second = Nothing
172 , _hd_language_iso2 = Just $ T.pack $ show lang }
173 ) (text2titleParagraphs paragraphSize ctxts)