Haskell Problem 1: Unix grep (part 3)
May 24, 2009
See the problem statement in a previous post.
A Complete Solution
This solution adds support for recursive searches, the -l and -L flags, and regular expressions.
Regular expressions were difficult. This blog post explains how to use them, but I had difficulty compiling the code. The command that was good enough previously, namely
ghc grep.hs
resulted in linker errors when I included Test.Regex.Posix. The fix was to rename my file Main.hs and compile it with
ghc --make Main.hs
module Main where
import System
import Data.List
import IO
import Char
import Directory
import Text.Regex.Posix
getOptions [] = []
getOptions (x:xs) | x == "--" = []
| x /= "--" = if length x > 1 && x !! 0 == '-'
then (tail x) ++ getOptions xs
else getOptions xs
recursiveDir opts = elem 'r' opts
outputNonMatch opts = ( elem 'v' opts && not (elem 'l' opts) ) || elem 'L' opts
outputPerFile opts = elem 'l' opts || elem 'L' opts
matchCaseInsensitive opts = elem 'i' opts
matchRegularExpression opts = elem 'E' opts
matchExact opts = not (matchCaseInsensitive opts) && not (matchRegularExpression opts)
showLine opts = not (outputPerFile opts)
showOffset opts = not (outputPerFile opts) && elem 'b' opts
showLineNumber opts = not (outputPerFile opts) && elem 'n' opts
showFile opts = elem 'H' opts || elem 'r' opts
getNonOptionArguments [] = []
getNonOptionArguments (x:xs) | x == "--" = xs
| x/= "--" = if x == "-" || x !! 0 /= '-'
then x : getNonOptionArguments xs
else getNonOptionArguments xs
getGrepPattern opts firstArg = if matchCaseInsensitive opts
then map toLower firstArg
else firstArg
data Location a b c = Location { file :: String, lineNumber :: Int, offset :: Int }
incLocation loc l = Location (file loc ) (lineNumber loc + 1) (offset loc + length l + 1)
showResult loc opts pattern l = do
if showFile opts
then do putStr (file loc)
putChar ':'
else return ()
if showLineNumber opts
then do putStr (show (lineNumber loc + 1))
putChar ':'
else return ()
if showOffset opts
then do putStr (show (offset loc))
putChar ':'
else return ()
putStrLn l
doesLineMatch opts pattern l | matchRegularExpression opts = l =~ pattern :: Bool
| matchCaseInsensitive opts = isInfixOf pattern (map toLower l)
| True = isInfixOf pattern l
grepHandlePerFile loc opts pattern h = do
eof <- hIsEOF h
if eof
then
if outputNonMatch opts
then putStrLn (file loc)
else return ()
else do
l <- hGetLine h
if doesLineMatch opts pattern l
then
if outputNonMatch opts
then return ()
else putStrLn (file loc)
else
grepHandlePerFile (incLocation loc l) opts pattern h
grepHandlePerLine loc opts pattern h = do
eof <- hIsEOF h
if eof
then return ()
else do
l <- hGetLine h
if ( doesLineMatch opts pattern l ) == ( outputNonMatch opts )
then return ()
else showResult loc opts pattern l
grepHandle (incLocation loc l) opts pattern h
grepHandle loc opts pattern h = do
if outputPerFile opts
then grepHandlePerFile loc opts pattern h
else grepHandlePerLine loc opts pattern h
grepFile opts pattern file = do
if file == "-"
then grepHandle (Location file 0 0) opts pattern stdin
else do
d <- doesDirectoryExist file
if d
then
if recursiveDir opts
then do
dc <- getDirectoryContents file
grepFiles opts pattern (map ( (file ++ "/") ++ ) (filter (\x -> x !! 0 /= '.' ) dc))
else return ()
else do
h <- openFile file ReadMode `catch` (\e -> do hPutStrLn stderr (show e); return (stderr))
if h == stderr
then return ()
else grepHandle (Location file 0 0) opts pattern h
grepFiles opts pattern files = do
if length files == 0
then return ()
else do
grepFile opts pattern (head files)
grepFiles opts pattern (tail files)
main = do
usage <- return "Usage: grep [OPTION]... PATTERN [FILE]..."
a <- getArgs
userOpts <- return ( getOptions a )
args <- return ( getNonOptionArguments a )
badOpts <- return ( userOpts \\ "bEhHilLnrv" )
opts <- if length args > 2 && not ( elem 'h' userOpts )
then return ('H':userOpts)
else return (userOpts)
if length badOpts > 0
then do putStr "grep: invalid option -- "
putChar (badOpts !! 0)
putStrLn ""
putStrLn usage
else if length args == 0
then putStrLn usage
else do pattern <- return ( getGrepPattern opts (head args) )
if length args == 1
then grepHandle (Location "(standard input)" 0 0) opts pattern stdin
else grepFiles opts pattern (tail args)
May 25, 2009 at 8:32 am
[...] Problem 1: Unix grep (part 4) Filed under: Uncategorized — clarkgrubb @ 8:20 am The previous solution is a lot slower than the system provided [...]