2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
60 JobFunction (\p log'' ->
64 in documentsFromWriteNodes uId nId p (liftBase . log')
67 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
73 documentsFromWriteNodes uId nId _p logStatus = do
75 logStatus JobLog { _scst_succeeded = Just 1
76 , _scst_failed = Just 0
77 , _scst_remaining = Just 1
78 , _scst_events = Just []
81 mcId <- getClosestParentIdByType' nId NodeCorpus
82 let cId = maybe (panic "[G.A.N.DFWN] Node has no parent") identity mcId
84 frameWriteIds <- getChildrenByType nId NodeFrameWrite
86 -- https://write.frame.gargantext.org/<frame_id>/download
87 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
89 frameWritesWithContents <- liftBase $
91 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
95 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
96 let parsed = rights parsedE
98 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing
100 pure JobLog { _scst_succeeded = Just 2
101 , _scst_failed = Just 0
102 , _scst_remaining = Just 0
103 , _scst_events = Just []
105 ------------------------------------------------------------------------
106 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
107 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
108 case parseLines contents of
109 Left _ -> Left "Error parsing node"
110 Right (Parsed { authors, contents = c, date, source, title = t }) ->
111 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ] in
112 let authors' = T.concat $ authorJoinSingle <$> authors in
113 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
116 , _hd_uniqId = Nothing
117 , _hd_uniqIdBdd = Nothing
120 , _hd_authors = Just authors'
121 , _hd_institutes = Nothing
122 , _hd_source = source
123 , _hd_abstract = Just c
124 , _hd_publication_date = date
125 , _hd_publication_year = Nothing -- TODO
126 , _hd_publication_month = Nothing -- TODO
127 , _hd_publication_day = Nothing -- TODO
128 , _hd_publication_hour = Nothing
129 , _hd_publication_minute = Nothing
130 , _hd_publication_second = Nothing
131 , _hd_language_iso2 = Just $ T.pack $ show EN }