Technical Musings: Running average in Erlang (escript)

Wednesday, April 27, 2011

Running average in Erlang (escript)

UPDATE: This script now on gist: https://gist.github.com/dgulino/4750139

Another python util I've created a while back, and usually use in conjunction with tgraph, is a running (or moving) average util that can also be piped into.  So, as another exercise, I created an Erlang/escript version.

The main use case is when I'm looking into a problem, and tailing a log.  Too much data.  Awk/perl/pyline the interesting numbers and tail that.  Numbers running too fast or changing too often to see a pattern.  So I then pipe the number stream through a running average and then to tgraph.

tail -f some.log | awk -F" " '{print $10}' | ./runavg.escript | ./tgraph.escript

Another similar case is to run a whole or portion of an existing log through runavg/tgraph to look for a pattern.  Make sure you set your terminal's buffer size to 2-3K before trying that.

tail -10000 some.log | awk -F" " '{print $10}' | ./runavg.escript -s 10 -i 10 | ./tgraph.escript -t 1000

runavg.escript
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp disable
%% Author: Drew Gulino
-module(runavg).

-export([main/1]).

main(CmdLine) ->
OptSpecList = option_spec_list(),
case getopt:parse(OptSpecList, CmdLine) of
  {ok, {Options, NonOptArgs}} ->
    true;
  {error, {Reason, Data}} ->
    Options = [],
    NonOptArgs = [],
    io:format("Error: ~s ~p~n~n", [Reason, Data]),
    version(),
    getopt:usage(OptSpecList, "runavg")
end,
SampleSize = get_opt_value(size,Options),
OutputInterval = get_opt_value(interval,Options),

case NonOptArgs of
  [] ->
  F = standard_io;
  _ ->
  {ok, F} = file:open(NonOptArgs, read)
end,
%io:format("~p,~p,~p~n",[F, SampleSize, OutputInterval]),
proc_file(F, SampleSize, OutputInterval ).


proc_file(F, SampleSize, OutputInterval) ->
 %io:format("1"),
 proc_file(F, SampleSize, OutputInterval, [], 0, 0).

proc_file(F, SampleSize, OutputInterval, SampleAcc, SampleCount, IntervalCount) when IntervalCount >= OutputInterval ->
  io:format("~.2f~n",[lists:sum(SampleAcc)/erlang:length(SampleAcc)]),
  proc_file(F, SampleSize, OutputInterval, SampleAcc, SampleCount, 0);
proc_file(F, SampleSize, OutputInterval, [_|T] , SampleCount, IntervalCount) when SampleCount >= SampleSize ->
  %io:format("T: ~p~n",[T]),
  proc_file(F, SampleSize, OutputInterval, T, SampleCount - 1, IntervalCount);
proc_file(F, SampleSize, OutputInterval, SampleAcc, SampleCount, IntervalCount) ->
%io:format("SampleAcc: ~p~n", [SampleAcc]),
L = io:get_line(F, ''),
case L of
  eof ->
    ok;
  "\n" ->
    false;
  Line ->
    Stripped = strip_newlines(Line),
    Num = cast_to_integer(Stripped),
    proc_file(F, SampleSize, OutputInterval, [Num] ++ SampleAcc, SampleCount + 1, IntervalCount + 1)
end.

version() ->
  io:format("Version: 1.0\n").

get_opt_value(Key, Options) ->
  case lists:keyfind(Key,1,Options) of
    {Key, Value} ->
      Value
  end,
  Value.

option_spec_list() ->
[
%% {Name, ShortOpt, LongOpt, ArgSpec, HelpMsg}
{help, $h, "help", undefined, "Show the program options"},
{version, $v, "version", undefined, "Version"},
{size, $s, "size", {integer, 5}, "Size of average sample, Default=5"},
{interval, $i, "interval", {integer, 5}, "How many input entries before average is displayed, Default=5"}
].
       
strip_newlines(String) ->
string:strip(re:replace(String,"(.*)[\n\r]","\\1", [global,{return,list}])).

cast_to_integer([]) ->
[];
cast_to_integer(Input) when is_integer(Input) ->
Input;
cast_to_integer(Input) when is_float(Input) ->
erlang:round(Input);
cast_to_integer(Input) when is_list(Input)->
case lists:member($., Input) of
  true ->
    erlang:round(erlang:list_to_float(Input));
  false ->    
    erlang:list_to_integer(Input)
end. 

A util I use to test numeric pipes is a random number streamer:
randstream.escript:
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp disable
%% Author: Drew Gulino
-module(randstream).

-export([main/1]).

main(_) ->
{A1,A2,A3} = now(),
random:seed(A1, A2, A3),
rand().

rand() ->
Num = random:uniform(100),
io:format("~B~n",[Num]),
rand().

./randstream.escript | ./runavg.escript -s 100 -i 100

should return a list of numbers right around 49 (average between 0 and 100).

No comments: