]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
Merge remote-tracking branch 'origin/dev-hackathon-fixes' into dev-merge
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 import Conduit
20 import Control.Lens ((^.))
21 import Data.Aeson
22 import Data.Either (Either(..), rights)
23 -- import Data.Maybe (fromMaybe)
24 import Data.Swagger
25 import GHC.Generics (Generic)
26 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
27 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
28 import Gargantext.API.Admin.Types (HasSettings)
29 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
30 import Gargantext.API.Prelude (GargM, GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
33 import Gargantext.Core.Text.Terms (TermType(..))
34 import Gargantext.Core.Types.Individu (User(..))
35 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
36 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
37 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
38 import Gargantext.Database.Admin.Types.Hyperdata.Frame
39 import Gargantext.Database.Admin.Types.Node
40 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
41 import Gargantext.Database.Schema.Node (node_hyperdata, node_name)
42 import Gargantext.Prelude
43 import Gargantext.Utils.Jobs (serveJobsAPI)
44 import Servant
45 import qualified Data.List as List
46 import qualified Data.Text as T
47 import qualified Gargantext.Defaults as Defaults
48
49 ------------------------------------------------------------------------
50 type API = Summary " Documents from Write nodes."
51 :> AsyncJobs JobLog '[JSON] Params JobLog
52 ------------------------------------------------------------------------
53 newtype Params = Params { id :: Int }
54 deriving (Generic, Show)
55 instance FromJSON Params where
56 parseJSON = genericParseJSON defaultOptions
57 instance ToJSON Params where
58 toJSON = genericToJSON defaultOptions
59 instance ToSchema Params
60 ------------------------------------------------------------------------
61 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
62 api uId nId =
63 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
64 let
65 log' x = do
66 liftBase $ log'' x
67 in documentsFromWriteNodes uId nId p (liftBase . log')
68
69 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
70 => UserId
71 -> NodeId
72 -> Params
73 -> (JobLog -> m ())
74 -> m JobLog
75 documentsFromWriteNodes uId nId _p logStatus = do
76 let jobLog = JobLog { _scst_succeeded = Just 1
77 , _scst_failed = Just 0
78 , _scst_remaining = Just 1
79 , _scst_events = Just []
80 }
81 logStatus jobLog
82
83 mcId <- getClosestParentIdByType' nId NodeCorpus
84 cId <- case mcId of
85 Just cId -> pure cId
86 Nothing -> do
87 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
88 logStatus $ jobLogFailTotalWithMessage msg jobLog
89 panic msg
90
91 frameWriteIds <- getChildrenByType nId NodeFrameWrite
92
93 -- https://write.frame.gargantext.org/<frame_id>/download
94 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
95
96 frameWritesWithContents <- liftBase $
97 mapM (\node -> do
98 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
99 pure (node, contents)
100 ) frameWrites
101
102 let parsedE = map (\(node, contents) -> hyperdataDocumentFromFrameWrite 7 (node, contents)) frameWritesWithContents
103 -- TODO hard coded param should be taken from user
104 let parsed = List.concat $ rights parsedE
105
106 _ <- flowDataText (RootId (NodeId uId))
107 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
108 (Multi EN) cId Nothing logStatus
109
110 pure $ jobLogSuccess jobLog
111
112 ------------------------------------------------------------------------
113 hyperdataDocumentFromFrameWrite :: Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
114 hyperdataDocumentFromFrameWrite paragraphSize (node, contents) =
115 case parseLines contents of
116 Left _ -> Left "Error parsing node"
117 Right (Parsed { authors, contents = ctxts, date }) ->
118 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
119 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
120 authors' = T.concat $ authorJoinSingle <$> authors
121 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
122 , T.pack $ show month, "-"
123 , T.pack $ show day ]) <$> date
124 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
125 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
126 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
127 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
128 , _hd_doi = Nothing
129 , _hd_url = Nothing
130 , _hd_uniqId = Nothing
131 , _hd_uniqIdBdd = Nothing
132 , _hd_page = Nothing
133 , _hd_title = Just t
134 , _hd_authors = Just authors'
135 , _hd_institutes = Nothing
136 , _hd_source = Just $ node ^. node_name
137 , _hd_abstract = Just ctxt
138 , _hd_publication_date = date'
139 , _hd_publication_year = Just year'
140 , _hd_publication_month = Just month'
141 , _hd_publication_day = Just day'
142 , _hd_publication_hour = Nothing
143 , _hd_publication_minute = Nothing
144 , _hd_publication_second = Nothing
145 , _hd_language_iso2 = Just $ T.pack $ show EN }
146 ) (text2titleParagraphs paragraphSize ctxts)
147 )