]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
Merge branch 'dev' into dev-corpora-from-write-nodes
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15 {-# OPTIONS_GHC -fno-warn-orphans #-}
16
17 module Gargantext.API.Node.DocumentsFromWriteNodes
18 where
19
20 import Control.Lens ((^.))
21 import Data.Aeson
22 import Data.Either (Either(..), rights)
23 import Data.Swagger
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
41 import Servant
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
43
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
50
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
58 api uId nId =
59 serveJobsAPI $
60 JobFunction (\p log'' ->
61 let
62 log' x = do
63 printDebug "documents from write nodes" x
64 liftBase $ log'' x
65 in documentsFromWriteNodes uId nId p (liftBase . log')
66 )
67
68 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
69 => UserId
70 -> NodeId
71 -> Params
72 -> (JobLog -> m ())
73 -> m JobLog
74 documentsFromWriteNodes uId nId p logStatus = do
75
76 logStatus JobLog { _scst_succeeded = Just 1
77 , _scst_failed = Just 0
78 , _scst_remaining = Just 1
79 , _scst_events = Just []
80 }
81
82 _ <- printDebug "[documentsFromWriteNodes] inside job, uId" uId
83 _ <- printDebug "[documentsFromWriteNodes] inside job, nId" nId
84 _ <- printDebug "[documentsFromWriteNodes] inside job, p" p
85
86 frameWriteIds <- getChildrenByType nId NodeFrameWrite
87 _ <- printDebug "[documentsFromWriteNodes] children" frameWriteIds
88
89 -- https://write.frame.gargantext.org/<frame_id>/download
90 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
91
92 frameWritesWithContents <- liftBase $
93 mapM (\node -> do
94 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
95 pure (node, contents)
96 ) frameWrites
97 _ <- printDebug "[documentsFromWriteNodes] frameWritesWithContents" frameWritesWithContents
98
99 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
100 let parsed = rights parsedE
101 _ <- printDebug "[documentsFromWriteNodes] parsed" parsed
102
103 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) nId Nothing
104
105 pure JobLog { _scst_succeeded = Just 2
106 , _scst_failed = Just 0
107 , _scst_remaining = Just 0
108 , _scst_events = Just []
109 }
110 ------------------------------------------------------------------------
111 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
112 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
113 case parseLines contents of
114 Left _ -> Left "Error parsing node"
115 Right (Parsed { authors, contents = c, date, source, title = t }) ->
116 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ] in
117 let authors' = T.concat $ authorJoinSingle <$> authors in
118 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
119 , _hd_doi = Nothing
120 , _hd_url = Nothing
121 , _hd_uniqId = Nothing
122 , _hd_uniqIdBdd = Nothing
123 , _hd_page = Nothing
124 , _hd_title = Just t
125 , _hd_authors = Just authors'
126 , _hd_institutes = Nothing
127 , _hd_source = source
128 , _hd_abstract = Just c
129 , _hd_publication_date = date
130 , _hd_publication_year = Nothing -- TODO
131 , _hd_publication_month = Nothing -- TODO
132 , _hd_publication_day = Nothing -- TODO
133 , _hd_publication_hour = Nothing
134 , _hd_publication_minute = Nothing
135 , _hd_publication_second = Nothing
136 , _hd_language_iso2 = Just $ T.pack $ show EN }