]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Isidore.hs
Merge branch 'dev-ngrams-repo' of ssh://delanoe.org/haskell-gargantext into dev-ngram...
[gargantext.git] / src / Gargantext / Text / Parsers / Isidore.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Isidore
3 Description : To query French Humanities publication database
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 TODO:
11 - put endpoint in configuration file
12 - more flexible fields of research
13 - type database name
14 - use more ontologies to help building corpora
15 -}
16
17
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE ScopedTypeVariables #-}
20 {-# LANGUAGE OverloadedStrings #-}
21
22 module Gargantext.Text.Parsers.Isidore where
23
24 import Control.Lens hiding (contains)
25 import Data.ByteString.Lazy (ByteString)
26 import Data.RDF hiding (triple, Query)
27 import Data.Text hiding (groupBy, map)
28 import Database.HSparql.Connection
29 import Database.HSparql.QueryGenerator
30 import Gargantext.Database.Types.Node (HyperdataDocument(..))
31 import Gargantext.Prelude
32 import Gargantext.Core (Lang)
33 import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
34 import Prelude (String)
35
36 route :: EndPoint
37 route = "https://isidore.science/sparql/"
38
39 selectQueryRaw' :: String -> String -> IO (Response ByteString)
40 selectQueryRaw' uri q = getWith opts uri
41 where
42 opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
43 & header "User-Agent" .~ ["gargantext-hsparql-client"]
44 & param "query" .~ [Data.Text.pack q]
45
46 isidoreGet :: Lang -> Text -> IO (Maybe [HyperdataDocument])
47 isidoreGet l q = do
48 bindingValues <- isidoreGet' q
49 case bindingValues of
50 Nothing -> pure Nothing
51 Just dv -> pure $ Just $ map (bind2doc l) dv
52
53 isidoreGet' :: Text -> IO (Maybe [[BindingValue]])
54 isidoreGet' q = do
55 let s = createSelectQuery $ isidoreSelect q
56 putStrLn s
57 r <- selectQueryRaw' route s
58 putStrLn $ show $ r ^. responseStatus
59 pure $ structureContent $ r ^. responseBody
60 -- res <- selectQuery route $ simpleSelect q
61 -- pure res
62
63 isidoreSelect :: Text -> Query SelectQuery
64 isidoreSelect q = do
65 -- See Predefined Namespace Prefixes:
66 -- https://isidore.science/sparql?nsdecl
67 isidore <- prefix "isidore" (iriRef "http://www.rechercheisidore.fr/class/")
68 rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
69 dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
70 dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
71 --iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
72 --ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
73 --bif <- prefix "bif" (iriRef "bif:")
74
75 link <- var
76 title <- var
77 date <- var
78 abstract <- var
79 authors <- var
80 source <- var
81 langDoc <- var
82 publisher <- var
83 --langFr <- var
84 --agg <- var
85
86 triple_ link (rdf .:. "type") (isidore .:. "BibliographicalResource")
87 triple_ link (dcterms .:. "title") title
88 triple_ link (dcterms .:. "date") date
89 triple_ link (dcterms .:. "creator") authors
90 triple_ link (dcterms .:. "language") langDoc
91 triple_ link (dc .:. "description") abstract
92 --triple_ link (ore .:. "isAggregatedBy") agg
93 --triple_ agg (dcterms .:. "title") title
94
95 optional_ $ triple_ link (dcterms .:. "source") source
96 optional_ $ triple_ link (dcterms .:. "publisher") publisher
97
98 -- TODO FIX BUG with (.||.) operator
99 --filterExpr $ (.||.) (contains title q) (contains title q)
100 filterExpr_ (containsWith title q) -- (contains abstract q)
101 --filterExpr (containsWith abstract q) -- (contains abstract q)
102
103 -- TODO FIX filter with lang
104 --filterExpr $ langMatches title (str ("fra" :: Text))
105 --filterExpr $ (.==.) lang (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
106
107 orderNextDesc date
108 limit_ 10
109 distinct_
110 selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
111
112 -- | TODO : check if all cases are taken into account
113 unbound :: Lang -> BindingValue -> Maybe Text
114 unbound _ Unbound = Nothing
115 unbound _ (Bound (UNode x)) = Just x
116 unbound _ (Bound (LNode (TypedL x _))) = Just x
117 unbound _ (Bound (LNode (PlainL x))) = Just x
118 unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
119 unbound _ _ = Nothing
120
121 bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
122 bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
123 HyperdataDocument (Just "Isidore")
124 Nothing
125 (unbound l link)
126 Nothing Nothing Nothing
127 (unbound l title)
128 (unbound l authors)
129 Nothing
130 (unbound l publisher)
131 (unbound l abstract)
132 (unbound l date)
133 Nothing Nothing Nothing Nothing Nothing Nothing
134 (unbound l langDoc)
135
136 bind2doc _ _ = undefined