#!/usr/bin/env stack > -- stack script --resolver lts-22.6 --package text --package bytestring --package process --package directory --package filepath search.lhs: ryvm-ranked gopher search with progressive feedback =============================================================== A Haskell rewrite of the old search.sh that wraps the ryvm ranking engine. Same backend, same result format, but with reliable initial- line streaming — bash's stdout couldn't be made to flush a single "Searching..." line before ryvm finished (~50 second wait with nothing on the wire); Haskell's `hSetBuffering stdout NoBuffering` plus `hFlush` after each emission guarantees the feedback line lands immediately. ryvm is fundamentally batched — relevance ranking needs every candidate's score before producing a sorted list, so the *results* themselves still arrive in a clump near the end. The improvement over search.sh is that the user now sees "I'm searching" within milliseconds rather than nothing at all. For *truly* streaming search (every match as it's found, in walk order), see grep.lhs. URL design ---------- /applets/search.lhs → prompt page /applets/search.lhs + text → ranked results Running the doctests -------------------- doctest-lhs search.lhs Reuses this file's own `-- stack` directive and language pragmas, so doctest sees the same packages and extensions `run-cached-lhs` compiles against. Imports ------- > {-# LANGUAGE OverloadedStrings #-} > module Main (main) where > > import Control.Exception (SomeException, try) > import qualified Data.ByteString as BS > import qualified Data.Text as T > import qualified Data.Text.Encoding as TE > import qualified Data.Text.Encoding.Error as TEE > import qualified Data.Text.IO as TIO > import System.Environment (getArgs, lookupEnv) > import System.IO (BufferMode (..), Handle, hClose, > hFlush, hGetLine, hIsEOF, > hSetBuffering, hSetEncoding, > stdout, utf8) > import System.Process (CreateProcess (..), StdStream (..), > createProcess, proc, > waitForProcess) Constants --------- > defaultHost, defaultPort :: T.Text > defaultHost = "gopher.someodd.zip" > defaultPort = "70" > searchRoot :: FilePath > searchRoot = "/var/gopher" Request parsing --------------- > data Req = Req > { reqSel :: T.Text > , reqQ :: T.Text > , reqP :: T.Text > } deriving Show > parseArgs :: [String] -> Req > parseArgs (s:q:p:_) = Req (T.pack s) (T.pack q) (T.pack p) > parseArgs (s:q:_) = Req (T.pack s) (T.pack q) T.empty > parseArgs [s] = Req (T.pack s) T.empty T.empty > parseArgs [] = error > "search.lhs: missing argv[0] (gopher selector). Run via Venusia." > data Ctx = Ctx > { ctxScriptSel :: T.Text > , ctxHost :: T.Text > , ctxPort :: T.Text > } Main dispatch ------------- `NoBuffering` + `hFlush` after every emission means each row reaches the wire instantly. Wrapping in `try` ensures any exception (ryvm not found, can't enter searchRoot, etc.) becomes a visible type-3 row instead of a blank response. > main :: IO () > main = do > hSetBuffering stdout NoBuffering > hSetEncoding stdout utf8 > result <- try mainBody :: IO (Either SomeException ()) > case result of > Right () -> pure () > Left e -> mapM_ putLine > [ errorItem ("search.lhs crashed: " <> T.pack (show e)) > , terminator > ] > mainBody :: IO () > mainBody = do > req <- parseArgs <$> getArgs > host <- maybe defaultHost T.pack <$> lookupEnv "GOPHER_HOST" > port <- maybe defaultPort T.pack <$> lookupEnv "GOPHER_PORT" > let pinfo = reqP req > scriptSel = T.dropEnd (T.length pinfo) (reqSel req) > ctx = Ctx scriptSel host port > q = T.strip (reqQ req) > if T.null q > then emitPrompt ctx > else runSearch ctx q Pages ----- > emitPrompt :: Ctx -> IO () > emitPrompt ctx = mapM_ putLine > [ infoLine "Search someodd's gopherhole's text files (ranked by relevance)." > , infoLine "(ryvm-based; result ordering favours matches near each other.)" > , infoLine "" > , searchLine "Query" (ctxScriptSel ctx) (ctxHost ctx) (ctxPort ctx) > , terminator > ] > runSearch :: Ctx -> T.Text -> IO () > runSearch ctx q = do > -- The whole point of porting away from bash: this line MUST land > -- at the client immediately, before ryvm chews through the tree. > -- Haskell's NoBuffering Handle + hFlush make that guaranteed. > putLine (infoLine ("Searching for \"" <> q <> "\" — ranked search, may take a moment...")) > putLine (infoLine "") > > -- Spawn ryvm with CWD = searchRoot so its --make-relative output > -- is relative to the served root (selectors line up with the > -- gopher namespace via the catch-all [[files]] block). > let ryvmArgs = ["--ext-whitelist", "txt", "--make-relative", ".", T.unpack q] > spec = (proc "ryvm" ryvmArgs) > { cwd = Just searchRoot > , std_out = CreatePipe > , std_err = Inherit > } > (_, mOut, _, ph) <- createProcess spec > case mOut of > Just h -> streamRyvmOutput ctx h > Nothing -> pure () > _ <- waitForProcess ph > putLine terminator > -- | Read ryvm's output line by line, formatting each TSV row as a > -- gophermap entry. ryvm tends to batch its writes (block-buffered > -- stdout, default for piped Handles in GHC) so the rows mostly > -- arrive together at the end — but we still process them line-by- > -- line so any progressive output ryvm DOES manage to flush lands > -- at the client as soon as we see it. > streamRyvmOutput :: Ctx -> Handle -> IO () > streamRyvmOutput ctx h = loop > where > loop = do > eof <- hIsEOF h > if eof > then hClose h > else do > line <- hGetLine h > emitMatch ctx (T.pack line) > loop > -- | Format one ryvm TSV row as a gophermap entry. Mirrors the awk > -- in the old search.sh: > -- > -- ryvm output: fileselscoresnippet > -- gopher row: {type}{display}{file}{host}{port} > -- > -- where: > -- * display = "{sel-or-file} — {snippet} [score {score}]" > -- * type = '1' if {file}'s first line looks like a gophermap > -- row (i.e. it IS a menu); '0' otherwise. > -- > -- Malformed lines (fewer than four TSV fields) are silently skipped > -- — better than crashing on a stray ryvm warning. > emitMatch :: Ctx -> T.Text -> IO () > emitMatch ctx line = case T.splitOn "\t" line of > (file : sel0 : score : snip : _) -> do > let sel = if T.null sel0 then file else sel0 > display = sel <> " — " <> snip <> " [score " <> score <> "]" > isMap <- isGophermapFile (T.unpack file) > let t = if isMap then T.singleton '1' else T.singleton '0' > putLine (t <> sanitize display > <> "\t" <> file > <> "\t" <> ctxHost ctx > <> "\t" <> ctxPort ctx) > _ -> pure () Gophermap-detection helper -------------------------- A file is rendered as menu (type 1) if its first line parses as a gophermap row — exactly the rule the awk-based search.sh used. The shape we check for is "two-or-more chars, then four tab-separated non-empty fields". Lenient UTF-8 decode means stray non-UTF-8 bytes in a file's first line don't crash the scan. > -- | True when the first line of the file at @path@ looks like a > -- gophermap row. > -- > -- The check is best-effort: missing files, permission errors, etc. > -- return 'False' (the file just won't be promoted to type 1 in > -- results). Files are read leniently (replacement for non-UTF-8 > -- bytes) so a stray binary doesn't blow up the search. > isGophermapFile :: FilePath -> IO Bool > isGophermapFile path = do > result <- try $ do > bs <- BS.readFile path > let firstLine = BS.takeWhile (/= 10) bs -- 10 is '\n' > decoded = TE.decodeUtf8With TEE.lenientDecode firstLine > stripped = T.dropWhileEnd (== '\r') decoded > pure (looksLikeGophermapRow stripped) > case (result :: Either SomeException Bool) of > Left _ -> pure False > Right b -> pure b > -- | A line "looks like a gophermap row" if it has four tab-separated > -- fields and the first one is at least two characters > -- (type-prefix + display). > -- > -- >>> looksLikeGophermapRow "0README.txt\t/README.txt\thost\t70" > -- True > -- > -- >>> looksLikeGophermapRow "iJust an info line\t\t\t0" > -- True > -- > -- >>> looksLikeGophermapRow "plain text" > -- False > -- > -- >>> looksLikeGophermapRow "0" > -- False > -- > -- >>> looksLikeGophermapRow "" > -- False > looksLikeGophermapRow :: T.Text -> Bool > looksLikeGophermapRow t = case T.splitOn "\t" t of > [first, _, _, _] -> T.length first >= 2 > _ -> False Gophermap line builders + sanitisation -------------------------------------- Same conventions as the other applets. `hFlush` after every line so the NoBuffering Handle definitely pushes bytes through. > putLine :: T.Text -> IO () > putLine t = do > TIO.putStr (t <> "\r\n") > hFlush stdout > -- | >>> infoLine "hello" > -- "ihello\tnull\terror.host\t1" > infoLine :: T.Text -> T.Text > infoLine msg = "i" <> sanitize msg <> "\tnull\terror.host\t1" > -- | >>> searchLine "Search" "/q" "host" "70" > -- "7Search\t/q\thost\t70" > searchLine :: T.Text -> T.Text -> T.Text -> T.Text -> T.Text > searchLine display sel host port = > "7" <> sanitize display <> "\t" <> sel <> "\t" <> host <> "\t" <> port > -- | >>> errorItem "nope" > -- "3nope\tnull\terror.host\t1" > errorItem :: T.Text -> T.Text > errorItem msg = "3" <> sanitize msg <> "\tnull\terror.host\t1" > terminator :: T.Text > terminator = "." > -- | >>> sanitize "ok" > -- "ok" > -- > -- >>> sanitize "with\ttab" > -- "with tab" > -- > -- >>> sanitize "line\nbreak" > -- "line break" > sanitize :: T.Text -> T.Text > sanitize = T.map (\c -> if c == '\r' || c == '\n' || c == '\t' then ' ' else c)