1 module Language.Porter (stem, fixstem)
9 | c `elem` "aeiou" = False
10 | c == 'y' = i == 0 || isVowel str (i - 1)
15 isVowel = (not .) . isConsonant
17 byIndex fun str = fun str [0..length str - 1]
19 measure = length . filter not . init . (True:) . map head . group . byIndex (map . isConsonant)
21 containsVowel = byIndex (any . isVowel)
23 endsWithDouble = startsWithDouble . reverse
25 startsWithDouble l | length l < 2 = False
26 | otherwise = let (x:y:_) = l in x == y && x `notElem` "aeiou"
28 cvc word | length word < 3 = False
29 | otherwise = isConsonant word lastIndex &&
30 isVowel word (lastIndex - 1) &&
31 isConsonant word (lastIndex - 2) &&
32 last word `notElem` "wxy"
33 where lastIndex = length word - 1
35 statefulReplace predicate str end replacement
36 | end `isSuffixOf` str = Just replaced
39 part = take (length str - length end) str
40 replaced | predicate part = Right (part ++ replacement)
41 | otherwise = Left str
43 replaceEnd predicate str end replacement = do
44 result <- statefulReplace predicate str end replacement
45 return (either id id result)
47 findStem f word pairs = msum $ map (uncurry (replaceEnd f word)) pairs
49 measureGT = flip ((>) . measure)
51 step1a word = fromMaybe word result
52 where result = findStem (const True) word [("sses", "ss"), ("ies", "i"), ("ss", "ss"), ("s", "")]
54 beforeStep1b word = fromMaybe (Left word) result
56 cond23 x = do { v <- x; either (const Nothing) (return . Right) v }
57 cond1 x = do { v <- x; return (Left v) }
59 cond1 (replaceEnd (measureGT 0) word "eed" "ee") `mplus`
60 cond23 (statefulReplace containsVowel word "ed" "" ) `mplus`
61 cond23 (statefulReplace containsVowel word "ing" "" )
63 afterStep1b word = fromMaybe word result
65 double = endsWithDouble word && not (any ((`isSuffixOf` word) . return) "lsz")
66 mEq1AndCvc = measure word == 1 && cvc word
67 iif cond val = if cond then Just val else Nothing
68 result = findStem (const True) word [("at", "ate"), ("bl", "ble"), ("iz", "ize")]
69 `mplus` iif double (init word)
70 `mplus` iif mEq1AndCvc (word ++ "e")
72 step1b = either id afterStep1b . beforeStep1b
74 step1c word = fromMaybe word result
75 where result = replaceEnd containsVowel word "y" "i"
77 step1 = step1c . step1b . step1a
79 step2 word = fromMaybe word result
81 result = findStem (measureGT 0) word
104 step3 word = fromMaybe word result
106 result = findStem (measureGT 0) word
115 step4 word = fromMaybe word result
117 gt1andST str = (measureGT 1) str && any ((`isSuffixOf` str) . return) "st"
118 findGT1 = findStem (measureGT 1) word . map (flip (,) "")
119 result = (findGT1 ["al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent"]) `mplus`
120 (findStem gt1andST word [("ion","")]) `mplus`
121 (findGT1 ["ou", "ism", "ate", "iti", "ous", "ive", "ize"])
123 step5a word = fromMaybe word result
125 test str = (measureGT 1 str) || ((measure str == 1) && (not $ cvc str))
126 result = replaceEnd test word "e" ""
128 step5b word = fromMaybe word result
130 cond s = last s == 'l' && measureGT 1 s
131 result = replaceEnd cond word "l" ""
133 step5 = step5b . step5a
135 allSteps = step5 . step4 . step3 . step2 . step1
137 stem s | length s < 3 = s
138 | otherwise = allSteps s
140 fixpoint f x = let fx = f x in
145 fixstem = fixpoint stem
151 content <- readFile "input.txt"
152 writeFile "output.txt" $ unlines $ map stem $ lines content