]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
Fix NGrams pagination (purescript-gargantext#531)
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 -- import Data.Maybe (fromMaybe)
20 import Conduit
21 import Control.Lens ((^.))
22 import Data.Aeson
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
25 import Data.Swagger
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Prelude (GargM, GargError)
32 import Gargantext.Core (Lang(..))
33 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
34 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
35 import Gargantext.Core.Text.Terms (TermType(..))
36 import Gargantext.Core.Types.Individu (User(..))
37 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
38 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
39 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
40 import Gargantext.Database.Admin.Types.Hyperdata.Frame
41 import Gargantext.Database.Admin.Types.Node
42 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
43 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
44 import Gargantext.Prelude
45 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
46 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
47 import Servant
48 import Text.Read (readMaybe)
49 import qualified Data.List as List
50 import qualified Data.Text as T
51 -- import qualified Gargantext.Defaults as Defaults
52
53 ------------------------------------------------------------------------
54 type API = Summary " Documents from Write nodes."
55 :> AsyncJobs JobLog '[JSON] Params JobLog
56 ------------------------------------------------------------------------
57 data Params = Params
58 { id :: Int
59 , paragraphs :: Text
60 , lang :: Lang
61 , selection :: FlowSocialListWith
62 }
63 deriving (Generic, Show)
64 instance FromJSON Params where
65 parseJSON = genericParseJSON defaultOptions
66 instance ToJSON Params where
67 toJSON = genericToJSON defaultOptions
68 instance ToSchema Params
69 ------------------------------------------------------------------------
70 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
71 api uId nId =
72 serveJobsAPI DocumentFromWriteNodeJob $ \jHandle p ->
73 documentsFromWriteNodes uId nId p jHandle
74
75 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m, MonadJobStatus m)
76 => UserId
77 -> NodeId
78 -> Params
79 -> JobHandle m
80 -> m ()
81 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } jobHandle = do
82 markStarted 2 jobHandle
83 markProgress 1 jobHandle
84
85 mcId <- getClosestParentIdByType' nId NodeCorpus
86 cId <- case mcId of
87 Just cId -> pure cId
88 Nothing -> do
89 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
90 markFailed (Just msg) jobHandle
91 panic msg
92
93 frameWriteIds <- getChildrenByType nId NodeFrameWrite
94
95 -- https://write.frame.gargantext.org/<frame_id>/download
96 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
97
98 frameWritesWithContents <- liftBase $
99 mapM (\node -> do
100 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
101 pure (node, contents)
102 ) frameWrites
103
104 let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
105 let parsedE = (\(node, contents)
106 -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
107 let parsed = List.concat $ rights parsedE
108 -- printDebug "DocumentsFromWriteNodes: uId" uId
109 _ <- flowDataText (RootId (NodeId uId))
110 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
111 (Multi lang)
112 cId
113 (Just selection)
114 jobHandle
115
116 markProgress 1 jobHandle
117
118 ------------------------------------------------------------------------
119 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
120 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
121 case parseLines contents of
122 Left _ -> Left "Error parsing node"
123 Right (Parsed { authors, contents = ctxts}) ->
124 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
125 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
126 authors' = T.concat $ authorJoinSingle <$> authors
127
128 --{-
129 (year',month',day') = split' (node^. node_date)
130 date' = Just $ T.concat [ T.pack $ show year', "-"
131 , T.pack $ show month', "-"
132 , T.pack $ show day'
133 ]
134 --}
135
136 {-
137 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
138 , T.pack $ show month', "-"
139 , T.pack $ show day' ]) <$> date
140 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
141 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
142 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
143 --}
144 in
145 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
146 , _hd_doi = Nothing
147 , _hd_url = Nothing
148 , _hd_uniqId = Nothing
149 , _hd_uniqIdBdd = Nothing
150 , _hd_page = Nothing
151 , _hd_title = Just t
152 , _hd_authors = Just authors'
153 , _hd_institutes = Nothing
154 , _hd_source = Just $ node ^. node_name
155 , _hd_abstract = Just ctxt
156 , _hd_publication_date = date'
157 , _hd_publication_year = Just year'
158 , _hd_publication_month = Just month'
159 , _hd_publication_day = Just day'
160 , _hd_publication_hour = Nothing
161 , _hd_publication_minute = Nothing
162 , _hd_publication_second = Nothing
163 , _hd_language_iso2 = Just $ T.pack $ show lang }
164 ) (text2titleParagraphs paragraphSize ctxts)
165 )