2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 -- import Data.Maybe (fromMaybe)
21 import Control.Lens ((^.))
23 import Data.Either (Either(..), rights)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
27 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
28 import Gargantext.API.Admin.Types (HasSettings)
29 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
30 import Gargantext.API.Prelude (GargM, GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
33 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
34 import Gargantext.Core.Text.Terms (TermType(..))
35 import Gargantext.Core.Types.Individu (User(..))
36 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
37 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
38 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Hyperdata.Frame
40 import Gargantext.Database.Admin.Types.Node
41 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
42 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
43 import Gargantext.Prelude
44 import Gargantext.Utils.Jobs (serveJobsAPI)
45 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
47 import qualified Data.List as List
48 import qualified Data.Text as T
49 -- import qualified Gargantext.Defaults as Defaults
51 ------------------------------------------------------------------------
52 type API = Summary " Documents from Write nodes."
53 :> AsyncJobs JobLog '[JSON] Params JobLog
54 ------------------------------------------------------------------------
59 , selection :: FlowSocialListWith
61 deriving (Generic, Show)
62 instance FromJSON Params where
63 parseJSON = genericParseJSON defaultOptions
64 instance ToJSON Params where
65 toJSON = genericToJSON defaultOptions
66 instance ToSchema Params
67 ------------------------------------------------------------------------
68 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
70 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
74 in documentsFromWriteNodes uId nId p (liftBase . log')
76 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
82 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
83 let jobLog = JobLog { _scst_succeeded = Just 1
84 , _scst_failed = Just 0
85 , _scst_remaining = Just 1
86 , _scst_events = Just []
90 mcId <- getClosestParentIdByType' nId NodeCorpus
94 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
95 logStatus $ jobLogFailTotalWithMessage msg jobLog
98 frameWriteIds <- getChildrenByType nId NodeFrameWrite
100 -- https://write.frame.gargantext.org/<frame_id>/download
101 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
103 frameWritesWithContents <- liftBase $
105 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
106 pure (node, contents)
109 let parsedE = (\(node, contents)
110 -> hyperdataDocumentFromFrameWrite lang paragraphs (node, contents)) <$> frameWritesWithContents
111 let parsed = List.concat $ rights parsedE
112 printDebug "DocumentsFromWriteNodes: uId" uId
113 _ <- flowDataText (RootId (NodeId uId))
114 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
120 pure $ jobLogSuccess jobLog
122 ------------------------------------------------------------------------
123 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
124 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
125 case parseLines contents of
126 Left _ -> Left "Error parsing node"
127 Right (Parsed { authors, contents = ctxts}) ->
128 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
129 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
130 authors' = T.concat $ authorJoinSingle <$> authors
133 (year',month',day') = split' (node^. node_date)
134 date' = Just $ T.concat [ T.pack $ show year', "-"
135 , T.pack $ show month', "-"
141 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
142 , T.pack $ show month', "-"
143 , T.pack $ show day' ]) <$> date
144 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
145 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
146 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
149 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
152 , _hd_uniqId = Nothing
153 , _hd_uniqIdBdd = Nothing
156 , _hd_authors = Just authors'
157 , _hd_institutes = Nothing
158 , _hd_source = Just $ node ^. node_name
159 , _hd_abstract = Just ctxt
160 , _hd_publication_date = date'
161 , _hd_publication_year = Just year'
162 , _hd_publication_month = Just month'
163 , _hd_publication_day = Just day'
164 , _hd_publication_hour = Nothing
165 , _hd_publication_minute = Nothing
166 , _hd_publication_second = Nothing
167 , _hd_language_iso2 = Just $ T.pack $ show lang }
168 ) (text2titleParagraphs paragraphSize ctxts)