2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 -- import Data.Maybe (fromMaybe)
21 import Control.Lens ((^.))
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
32 import Gargantext.API.Prelude (GargM, GargError)
33 import Gargantext.Core (Lang(..))
34 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
35 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
36 import Gargantext.Core.Text.Terms (TermType(..))
37 import Gargantext.Core.Types.Individu (User(..))
38 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
39 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Hyperdata.Frame
42 import Gargantext.Database.Admin.Types.Node
43 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
44 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
45 import Gargantext.Prelude
46 import Gargantext.Utils.Jobs (serveJobsAPI)
47 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
49 import Text.Read (readMaybe)
50 import qualified Data.List as List
51 import qualified Data.Text as T
52 -- import qualified Gargantext.Defaults as Defaults
54 ------------------------------------------------------------------------
55 type API = Summary " Documents from Write nodes."
56 :> AsyncJobs JobLog '[JSON] Params JobLog
57 ------------------------------------------------------------------------
62 , selection :: FlowSocialListWith
64 deriving (Generic, Show)
65 instance FromJSON Params where
66 parseJSON = genericParseJSON defaultOptions
67 instance ToJSON Params where
68 toJSON = genericToJSON defaultOptions
69 instance ToSchema Params
70 ------------------------------------------------------------------------
71 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
73 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
77 in documentsFromWriteNodes uId nId p (liftBase . log')
79 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
85 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
86 let jobLog = JobLog { _scst_succeeded = Just 1
87 , _scst_failed = Just 0
88 , _scst_remaining = Just 1
89 , _scst_events = Just []
93 mcId <- getClosestParentIdByType' nId NodeCorpus
97 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
98 logStatus $ jobLogFailTotalWithMessage msg jobLog
101 frameWriteIds <- getChildrenByType nId NodeFrameWrite
103 -- https://write.frame.gargantext.org/<frame_id>/download
104 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
106 frameWritesWithContents <- liftBase $
108 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
109 pure (node, contents)
112 let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
113 let parsedE = (\(node, contents)
114 -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
115 let parsed = List.concat $ rights parsedE
116 -- printDebug "DocumentsFromWriteNodes: uId" uId
117 _ <- flowDataText (RootId (NodeId uId))
118 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
124 pure $ jobLogSuccess jobLog
126 ------------------------------------------------------------------------
127 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
128 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
129 case parseLines contents of
130 Left _ -> Left "Error parsing node"
131 Right (Parsed { authors, contents = ctxts}) ->
132 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
133 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
134 authors' = T.concat $ authorJoinSingle <$> authors
137 (year',month',day') = split' (node^. node_date)
138 date' = Just $ T.concat [ T.pack $ show year', "-"
139 , T.pack $ show month', "-"
145 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
146 , T.pack $ show month', "-"
147 , T.pack $ show day' ]) <$> date
148 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
149 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
150 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
153 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
156 , _hd_uniqId = Nothing
157 , _hd_uniqIdBdd = Nothing
160 , _hd_authors = Just authors'
161 , _hd_institutes = Nothing
162 , _hd_source = Just $ node ^. node_name
163 , _hd_abstract = Just ctxt
164 , _hd_publication_date = date'
165 , _hd_publication_year = Just year'
166 , _hd_publication_month = Just month'
167 , _hd_publication_day = Just day'
168 , _hd_publication_hour = Nothing
169 , _hd_publication_minute = Nothing
170 , _hd_publication_second = Nothing
171 , _hd_language_iso2 = Just $ T.pack $ show lang }
172 ) (text2titleParagraphs paragraphSize ctxts)