]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
Parse and convert text and title of wikipedia dump, #4
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Database.TextSearch where
18
19 import Prelude (print)
20
21 import Control.Monad
22
23 import Data.Aeson
24 import Data.List (intersperse)
25 import Data.String (IsString(..))
26 import Data.Text (Text, words)
27
28 import Database.PostgreSQL.Simple
29 import Database.PostgreSQL.Simple.ToField
30
31 import Gargantext.Database.Utils (connectGargandb)
32 import Gargantext.Prelude
33
34 newtype TSQuery = UnsafeTSQuery [Text]
35
36 toTSQuery :: [Text] -> TSQuery
37 toTSQuery txt = UnsafeTSQuery txt
38
39 instance IsString TSQuery
40 where
41 fromString = UnsafeTSQuery . words . cs
42
43
44 instance ToField TSQuery
45 where
46 toField (UnsafeTSQuery xs)
47 = Many $ intersperse (Plain " && ")
48 $ map (\q -> Many [ Plain "plainto_tsquery("
49 , Escape (cs q)
50 , Plain ")"
51 ]
52 ) xs
53
54 type ParentId = Int
55 type Limit = Int
56 type Offset = Int
57 data Order = Asc | Desc
58
59 instance ToField Order
60 where
61 toField Asc = Plain "ASC"
62 toField Desc = Plain "DESC"
63
64 -- TODO
65 -- FIX fav
66 -- ADD ngrams count
67 -- TESTS
68 textSearchQuery :: Query
69 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
70 \ , n.hyperdata->'title' \
71 \ , n.hyperdata->'source' \
72 \ , n.hyperdata->'authors' \
73 \ , COALESCE(nn.score,null) \
74 \ FROM nodes n \
75 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
76 \ WHERE \
77 \ n.title_abstract @@ (?::tsquery) \
78 \ AND n.parent_id = ? AND n.typename = 40 \
79 \ ORDER BY n.hyperdata -> 'publication_date' ? \
80 \ offset ? limit ?;"
81
82
83 textSearch :: Connection
84 -> TSQuery -> ParentId
85 -> Limit -> Offset -> Order
86 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
87 textSearch conn q p l o ord = query conn textSearchQuery (q,p,ord, o,l)
88
89 textSearchTest :: ParentId -> TSQuery -> IO ()
90 textSearchTest pId q = connectGargandb "gargantext.ini"
91 >>= \conn -> textSearch conn q pId 5 0 Asc
92 >>= mapM_ print