See the problem statement in a previous post.

A Complete Solution

This solution adds support for recursive searches, the -l and -L flags, and regular expressions.

Regular expressions were difficult. This blog post explains how to use them, but I had difficulty compiling the code. The command that was good enough previously, namely

ghc grep.hs

resulted in linker errors when I included Test.Regex.Posix. The fix was to rename my file Main.hs and compile it with

ghc --make Main.hs

module Main where
import System
import Data.List
import IO
import Char
import Directory
import Text.Regex.Posix

getOptions [] = []
getOptions (x:xs) | x == "--" = []
                  | x /= "--" = if length x > 1 && x !! 0 == '-'
                                  then (tail x) ++ getOptions xs
                                  else getOptions xs 

recursiveDir opts = elem 'r' opts
outputNonMatch opts = ( elem 'v' opts && not (elem 'l' opts) ) || elem 'L' opts
outputPerFile opts = elem 'l' opts || elem 'L' opts
matchCaseInsensitive opts = elem 'i' opts
matchRegularExpression opts = elem 'E' opts
matchExact opts = not (matchCaseInsensitive opts) && not (matchRegularExpression opts)
showLine opts = not (outputPerFile opts)
showOffset opts = not (outputPerFile opts) && elem 'b' opts
showLineNumber opts = not (outputPerFile opts) && elem 'n' opts
showFile opts = elem 'H' opts || elem 'r' opts

getNonOptionArguments [] = []
getNonOptionArguments (x:xs) | x == "--" = xs
	                     | x/= "--" = if x == "-" || x !! 0 /= '-'
                                            then x : getNonOptionArguments xs
                                            else getNonOptionArguments xs

getGrepPattern opts firstArg = if matchCaseInsensitive opts
                                 then map toLower firstArg
                                 else firstArg

data Location a b c = Location { file :: String, lineNumber :: Int, offset :: Int }
incLocation loc l = Location (file loc ) (lineNumber loc + 1) (offset loc + length l + 1)

showResult loc opts pattern l = do
  if showFile opts
    then do putStr (file loc)
            putChar ':'
    else return ()
  if showLineNumber opts
    then do putStr (show (lineNumber loc + 1))
            putChar ':'
    else return ()
  if showOffset opts
    then do putStr (show (offset loc))
            putChar ':'
    else return ()
  putStrLn l

doesLineMatch opts pattern l | matchRegularExpression opts = l =~ pattern :: Bool
                             | matchCaseInsensitive opts   = isInfixOf pattern (map toLower l)
                             | True                        = isInfixOf pattern l

grepHandlePerFile loc opts pattern h = do
  eof <- hIsEOF h
  if eof
    then
      if outputNonMatch opts
        then putStrLn (file loc)
        else return ()
    else do
      l <- hGetLine h
      if doesLineMatch opts pattern l
        then
          if outputNonMatch opts
            then return ()
            else putStrLn (file loc)
        else
          grepHandlePerFile (incLocation loc l) opts pattern h

grepHandlePerLine loc opts pattern h = do
  eof <- hIsEOF h
  if eof
    then return ()
    else do
      l <- hGetLine h
      if ( doesLineMatch opts pattern l ) == ( outputNonMatch opts )
        then return ()
        else showResult loc opts pattern l
      grepHandle (incLocation loc l) opts pattern h

grepHandle loc opts pattern h = do
  if outputPerFile opts
    then grepHandlePerFile loc opts pattern h
    else grepHandlePerLine loc opts pattern h

grepFile opts pattern file = do
  if file == "-"
    then grepHandle (Location file 0 0) opts pattern stdin
    else do
      d <- doesDirectoryExist file
      if d
        then
          if recursiveDir opts
            then do
              dc <- getDirectoryContents file
              grepFiles opts pattern (map ( (file ++ "/") ++ ) (filter (\x -> x !! 0 /= '.' ) dc))
            else return ()
        else do
          h  <- openFile file ReadMode `catch` (\e -> do hPutStrLn stderr (show e); return (stderr))
          if h == stderr
            then return ()
            else grepHandle (Location file 0 0) opts pattern h

grepFiles opts pattern files = do
  if length files == 0
    then return ()
    else do
      grepFile opts pattern (head files)
      grepFiles opts pattern (tail files)

main = do
  usage <- return "Usage: grep [OPTION]... PATTERN [FILE]..."
  a <- getArgs
  userOpts <- return ( getOptions a )
  args <- return ( getNonOptionArguments a )
  badOpts <- return ( userOpts \\ "bEhHilLnrv" )
  opts <- if length args > 2 && not ( elem 'h' userOpts )
            then return ('H':userOpts)
            else return (userOpts)
  if length badOpts > 0
    then do putStr "grep: invalid option -- "
            putChar (badOpts !! 0)
	    putStrLn ""
            putStrLn usage
    else if length args == 0
           then putStrLn usage
           else do pattern <- return ( getGrepPattern opts (head args) )
                   if length args == 1
                     then grepHandle (Location "(standard input)" 0 0) opts pattern stdin
                     else grepFiles opts pattern (tail args)

One Response to “Haskell Problem 1: Unix grep (part 3)”


  1. [...] Problem 1: Unix grep (part 4) Filed under: Uncategorized — clarkgrubb @ 8:20 am The previous solution is a lot slower than the system provided [...]


Leave a Reply