2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 -- import Data.Maybe (fromMaybe)
21 import Control.Lens ((^.))
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
32 import Gargantext.API.Prelude (GargM, GargError)
33 import Gargantext.Core (Lang(..))
34 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
35 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
36 import Gargantext.Core.Text.Terms (TermType(..))
37 import Gargantext.Core.Types.Individu (User(..))
38 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
39 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Hyperdata.Frame
42 import Gargantext.Database.Admin.Types.Node
43 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
44 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
45 import Gargantext.Prelude
46 import Gargantext.Utils.Jobs (serveJobsAPI, jobHandleLogger)
47 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
49 import Text.Read (readMaybe)
50 import qualified Data.List as List
51 import qualified Data.Text as T
52 -- import qualified Gargantext.Defaults as Defaults
54 ------------------------------------------------------------------------
55 type API = Summary " Documents from Write nodes."
56 :> AsyncJobs JobLog '[JSON] Params JobLog
57 ------------------------------------------------------------------------
62 , selection :: FlowSocialListWith
64 deriving (Generic, Show)
65 instance FromJSON Params where
66 parseJSON = genericParseJSON defaultOptions
67 instance ToJSON Params where
68 toJSON = genericToJSON defaultOptions
69 instance ToSchema Params
70 ------------------------------------------------------------------------
71 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
73 serveJobsAPI DocumentFromWriteNodeJob $ \jHandle p ->
74 documentsFromWriteNodes uId nId p (jobHandleLogger jHandle)
76 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
82 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
83 let jobLog = JobLog { _scst_succeeded = Just 1
84 , _scst_failed = Just 0
85 , _scst_remaining = Just 1
86 , _scst_events = Just []
90 mcId <- getClosestParentIdByType' nId NodeCorpus
94 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
95 logStatus $ jobLogFailTotalWithMessage msg jobLog
98 frameWriteIds <- getChildrenByType nId NodeFrameWrite
100 -- https://write.frame.gargantext.org/<frame_id>/download
101 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
103 frameWritesWithContents <- liftBase $
105 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
106 pure (node, contents)
109 let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
110 let parsedE = (\(node, contents)
111 -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
112 let parsed = List.concat $ rights parsedE
113 -- printDebug "DocumentsFromWriteNodes: uId" uId
114 _ <- flowDataText (RootId (NodeId uId))
115 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
121 pure $ jobLogSuccess jobLog
123 ------------------------------------------------------------------------
124 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
125 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
126 case parseLines contents of
127 Left _ -> Left "Error parsing node"
128 Right (Parsed { authors, contents = ctxts}) ->
129 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
130 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
131 authors' = T.concat $ authorJoinSingle <$> authors
134 (year',month',day') = split' (node^. node_date)
135 date' = Just $ T.concat [ T.pack $ show year', "-"
136 , T.pack $ show month', "-"
142 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
143 , T.pack $ show month', "-"
144 , T.pack $ show day' ]) <$> date
145 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
146 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
147 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
150 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
153 , _hd_uniqId = Nothing
154 , _hd_uniqIdBdd = Nothing
157 , _hd_authors = Just authors'
158 , _hd_institutes = Nothing
159 , _hd_source = Just $ node ^. node_name
160 , _hd_abstract = Just ctxt
161 , _hd_publication_date = date'
162 , _hd_publication_year = Just year'
163 , _hd_publication_month = Just month'
164 , _hd_publication_day = Just day'
165 , _hd_publication_hour = Nothing
166 , _hd_publication_minute = Nothing
167 , _hd_publication_second = Nothing
168 , _hd_language_iso2 = Just $ T.pack $ show lang }
169 ) (text2titleParagraphs paragraphSize ctxts)