2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
26 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
27 import Gargantext.API.Admin.Types (HasSettings)
28 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
29 import Gargantext.API.Prelude (GargM, GargError)
30 import Gargantext.Core (Lang(..))
31 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
32 import Gargantext.Core.Text.Terms (TermType(..))
33 import Gargantext.Core.Types.Individu (User(..))
34 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
37 import Gargantext.Database.Admin.Types.Hyperdata.Frame
38 import Gargantext.Database.Admin.Types.Node
39 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
40 import Gargantext.Database.Schema.Node (node_hyperdata)
41 import qualified Gargantext.Defaults as Defaults
42 import Gargantext.Prelude
43 import Gargantext.Utils.Jobs (serveJobsAPI)
44 import GHC.Generics (Generic)
47 ------------------------------------------------------------------------
48 type API = Summary " Documents from Write nodes."
49 :> AsyncJobs JobLog '[JSON] Params JobLog
50 ------------------------------------------------------------------------
51 newtype Params = Params { id :: Int }
52 deriving (Generic, Show)
53 instance FromJSON Params where
54 parseJSON = genericParseJSON defaultOptions
55 instance ToJSON Params where
56 toJSON = genericToJSON defaultOptions
57 instance ToSchema Params
58 ------------------------------------------------------------------------
59 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
61 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
65 in documentsFromWriteNodes uId nId p (liftBase . log')
67 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
73 documentsFromWriteNodes uId nId _p logStatus = do
74 let jobLog = JobLog { _scst_succeeded = Just 1
75 , _scst_failed = Just 0
76 , _scst_remaining = Just 1
77 , _scst_events = Just []
81 mcId <- getClosestParentIdByType' nId NodeCorpus
85 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
86 logStatus $ jobLogFailTotalWithMessage msg jobLog
89 frameWriteIds <- getChildrenByType nId NodeFrameWrite
91 -- https://write.frame.gargantext.org/<frame_id>/download
92 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
94 frameWritesWithContents <- liftBase $
96 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
100 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
101 let parsed = rights parsedE
103 _ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi EN) cId Nothing logStatus
105 pure $ jobLogSuccess jobLog
106 ------------------------------------------------------------------------
107 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
108 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
109 case parseLines contents of
110 Left _ -> Left "Error parsing node"
111 Right (Parsed { authors, contents = c, date, source, title = t }) ->
112 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
113 authors' = T.concat $ authorJoinSingle <$> authors
114 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
115 , T.pack $ show month, "-"
116 , T.pack $ show day ]) <$> date
117 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
118 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
119 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
120 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
123 , _hd_uniqId = Nothing
124 , _hd_uniqIdBdd = Nothing
127 , _hd_authors = Just authors'
128 , _hd_institutes = Nothing
129 , _hd_source = source
130 , _hd_abstract = Just c
131 , _hd_publication_date = date'
132 , _hd_publication_year = Just year'
133 , _hd_publication_month = Just month'
134 , _hd_publication_day = Just day'
135 , _hd_publication_hour = Nothing
136 , _hd_publication_minute = Nothing
137 , _hd_publication_second = Nothing
138 , _hd_language_iso2 = Just $ T.pack $ show EN }