]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
Merge remote-tracking branch 'origin/475-dev-node-team-invite' into dev-merge
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 -- import Data.Maybe (fromMaybe)
20 import Conduit
21 import Control.Lens ((^.))
22 import Data.Aeson
23 import Data.Either (Either(..), rights)
24 import Data.Swagger
25 import GHC.Generics (Generic)
26 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
27 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
28 import Gargantext.API.Admin.Types (HasSettings)
29 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
30 import Gargantext.API.Prelude (GargM, GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
33 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
34 import Gargantext.Core.Text.Terms (TermType(..))
35 import Gargantext.Core.Types.Individu (User(..))
36 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
37 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
38 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Hyperdata.Frame
40 import Gargantext.Database.Admin.Types.Node
41 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
42 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
43 import Gargantext.Prelude
44 import Gargantext.Utils.Jobs (serveJobsAPI)
45 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
46 import Servant
47 import qualified Data.List as List
48 import qualified Data.Text as T
49 -- import qualified Gargantext.Defaults as Defaults
50
51 ------------------------------------------------------------------------
52 type API = Summary " Documents from Write nodes."
53 :> AsyncJobs JobLog '[JSON] Params JobLog
54 ------------------------------------------------------------------------
55 data Params = Params
56 { id :: Int
57 , paragraphs :: Int
58 , lang :: Lang
59 , selection :: FlowSocialListWith
60 }
61 deriving (Generic, Show)
62 instance FromJSON Params where
63 parseJSON = genericParseJSON defaultOptions
64 instance ToJSON Params where
65 toJSON = genericToJSON defaultOptions
66 instance ToSchema Params
67 ------------------------------------------------------------------------
68 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
69 api uId nId =
70 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
71 let
72 log' x = do
73 liftBase $ log'' x
74 in documentsFromWriteNodes uId nId p (liftBase . log')
75
76 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
77 => UserId
78 -> NodeId
79 -> Params
80 -> (JobLog -> m ())
81 -> m JobLog
82 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
83 let jobLog = JobLog { _scst_succeeded = Just 1
84 , _scst_failed = Just 0
85 , _scst_remaining = Just 1
86 , _scst_events = Just []
87 }
88 logStatus jobLog
89
90 mcId <- getClosestParentIdByType' nId NodeCorpus
91 cId <- case mcId of
92 Just cId -> pure cId
93 Nothing -> do
94 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
95 logStatus $ jobLogFailTotalWithMessage msg jobLog
96 panic msg
97
98 frameWriteIds <- getChildrenByType nId NodeFrameWrite
99
100 -- https://write.frame.gargantext.org/<frame_id>/download
101 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
102
103 frameWritesWithContents <- liftBase $
104 mapM (\node -> do
105 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
106 pure (node, contents)
107 ) frameWrites
108
109 let parsedE = (\(node, contents)
110 -> hyperdataDocumentFromFrameWrite lang paragraphs (node, contents)) <$> frameWritesWithContents
111 let parsed = List.concat $ rights parsedE
112 printDebug "DocumentsFromWriteNodes: uId" uId
113 _ <- flowDataText (RootId (NodeId uId))
114 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
115 (Multi lang)
116 cId
117 (Just selection)
118 logStatus
119
120 pure $ jobLogSuccess jobLog
121
122 ------------------------------------------------------------------------
123 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
124 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
125 case parseLines contents of
126 Left _ -> Left "Error parsing node"
127 Right (Parsed { authors, contents = ctxts}) ->
128 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
129 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
130 authors' = T.concat $ authorJoinSingle <$> authors
131
132 --{-
133 (year',month',day') = split' (node^. node_date)
134 date' = Just $ T.concat [ T.pack $ show year', "-"
135 , T.pack $ show month', "-"
136 , T.pack $ show day'
137 ]
138 --}
139
140 {-
141 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
142 , T.pack $ show month', "-"
143 , T.pack $ show day' ]) <$> date
144 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
145 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
146 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
147 --}
148 in
149 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
150 , _hd_doi = Nothing
151 , _hd_url = Nothing
152 , _hd_uniqId = Nothing
153 , _hd_uniqIdBdd = Nothing
154 , _hd_page = Nothing
155 , _hd_title = Just t
156 , _hd_authors = Just authors'
157 , _hd_institutes = Nothing
158 , _hd_source = Just $ node ^. node_name
159 , _hd_abstract = Just ctxt
160 , _hd_publication_date = date'
161 , _hd_publication_year = Just year'
162 , _hd_publication_month = Just month'
163 , _hd_publication_day = Just day'
164 , _hd_publication_hour = Nothing
165 , _hd_publication_minute = Nothing
166 , _hd_publication_second = Nothing
167 , _hd_language_iso2 = Just $ T.pack $ show lang }
168 ) (text2titleParagraphs paragraphSize ctxts)
169 )