Technical Musings: April 2011

Thursday, April 28, 2011

Compiling Erlang escript to .beam

As part of my experiments in command line Erlang escripts, I've been looking into compiling escripts into .beam files. There is a bit of a wait on starting my .escript files, and command line tools should have as little delay on startup as possible since they are generally started and stopped often.

First, I compiled my .escript files to .beam, I did this with this bash script:

erlsee.sh (get it?):
#!/usr/bin/bash
test=$1
echo $test

args=("$@")
file="$1"
echo ${file}
temp_file=$1.tmp
echo $temp_file
cp $file $temp_file
file=`echo "$1" | sed s/.escript/.erl/g` 
sed -n '1!p' $temp_file > $file
/usr/bin/env erlc $file
cp $temp_file $file
rm $temp_file

This allows you to compile an escript directly without having to remove the #! directives at the top or rename the file to have an .erl ending.

Secondly, I added the same #! headers to the .beam files, and set execution permissions (`chmod u+x *.beam`). Just open the .beam file with a text editor and paste the following lines at the top:

#!/usr/bin/env escript
%%! -smp disable


Third, I tested the running times with a very small input file with the 'time' utility. I figure this time should be dominated by the basic start up time, not the execution speeds.

$ time cat test.txt | ./runavg.escript -s 5 -i 5 | ./tgraph.escript
************************************************************************24
*********************************************15

real 0m2.077s
user 0m0.121s
sys 0m0.045s

$ time cat test.txt | ./runavg.beam -s 5 -i 5 | ./tgraph.beam
************************************************************************24
*********************************************15

real 0m2.182s
user 0m0.090s
sys 0m0.138s

I ran this multiple times, and the averages were almost equal; certainly not a big improvment. So it's not worth the extra effort to compile your escript files to .beam to improve startup times.

Execution time is another matter; it is probably improved, but that doesn't really matter for programs that just output text. They are fast enough as is.

Wednesday, April 27, 2011

Running average in Erlang (escript)

UPDATE: This script now on gist: https://gist.github.com/dgulino/4750139

Another python util I've created a while back, and usually use in conjunction with tgraph, is a running (or moving) average util that can also be piped into.  So, as another exercise, I created an Erlang/escript version.

The main use case is when I'm looking into a problem, and tailing a log.  Too much data.  Awk/perl/pyline the interesting numbers and tail that.  Numbers running too fast or changing too often to see a pattern.  So I then pipe the number stream through a running average and then to tgraph.

tail -f some.log | awk -F" " '{print $10}' | ./runavg.escript | ./tgraph.escript

Another similar case is to run a whole or portion of an existing log through runavg/tgraph to look for a pattern.  Make sure you set your terminal's buffer size to 2-3K before trying that.

tail -10000 some.log | awk -F" " '{print $10}' | ./runavg.escript -s 10 -i 10 | ./tgraph.escript -t 1000

runavg.escript
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp disable
%% Author: Drew Gulino
-module(runavg).

-export([main/1]).

main(CmdLine) ->
OptSpecList = option_spec_list(),
case getopt:parse(OptSpecList, CmdLine) of
  {ok, {Options, NonOptArgs}} ->
    true;
  {error, {Reason, Data}} ->
    Options = [],
    NonOptArgs = [],
    io:format("Error: ~s ~p~n~n", [Reason, Data]),
    version(),
    getopt:usage(OptSpecList, "runavg")
end,
SampleSize = get_opt_value(size,Options),
OutputInterval = get_opt_value(interval,Options),

case NonOptArgs of
  [] ->
  F = standard_io;
  _ ->
  {ok, F} = file:open(NonOptArgs, read)
end,
%io:format("~p,~p,~p~n",[F, SampleSize, OutputInterval]),
proc_file(F, SampleSize, OutputInterval ).


proc_file(F, SampleSize, OutputInterval) ->
 %io:format("1"),
 proc_file(F, SampleSize, OutputInterval, [], 0, 0).

proc_file(F, SampleSize, OutputInterval, SampleAcc, SampleCount, IntervalCount) when IntervalCount >= OutputInterval ->
  io:format("~.2f~n",[lists:sum(SampleAcc)/erlang:length(SampleAcc)]),
  proc_file(F, SampleSize, OutputInterval, SampleAcc, SampleCount, 0);
proc_file(F, SampleSize, OutputInterval, [_|T] , SampleCount, IntervalCount) when SampleCount >= SampleSize ->
  %io:format("T: ~p~n",[T]),
  proc_file(F, SampleSize, OutputInterval, T, SampleCount - 1, IntervalCount);
proc_file(F, SampleSize, OutputInterval, SampleAcc, SampleCount, IntervalCount) ->
%io:format("SampleAcc: ~p~n", [SampleAcc]),
L = io:get_line(F, ''),
case L of
  eof ->
    ok;
  "\n" ->
    false;
  Line ->
    Stripped = strip_newlines(Line),
    Num = cast_to_integer(Stripped),
    proc_file(F, SampleSize, OutputInterval, [Num] ++ SampleAcc, SampleCount + 1, IntervalCount + 1)
end.

version() ->
  io:format("Version: 1.0\n").

get_opt_value(Key, Options) ->
  case lists:keyfind(Key,1,Options) of
    {Key, Value} ->
      Value
  end,
  Value.

option_spec_list() ->
[
%% {Name, ShortOpt, LongOpt, ArgSpec, HelpMsg}
{help, $h, "help", undefined, "Show the program options"},
{version, $v, "version", undefined, "Version"},
{size, $s, "size", {integer, 5}, "Size of average sample, Default=5"},
{interval, $i, "interval", {integer, 5}, "How many input entries before average is displayed, Default=5"}
].
       
strip_newlines(String) ->
string:strip(re:replace(String,"(.*)[\n\r]","\\1", [global,{return,list}])).

cast_to_integer([]) ->
[];
cast_to_integer(Input) when is_integer(Input) ->
Input;
cast_to_integer(Input) when is_float(Input) ->
erlang:round(Input);
cast_to_integer(Input) when is_list(Input)->
case lists:member($., Input) of
  true ->
    erlang:round(erlang:list_to_float(Input));
  false ->    
    erlang:list_to_integer(Input)
end. 

A util I use to test numeric pipes is a random number streamer:
randstream.escript:
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp disable
%% Author: Drew Gulino
-module(randstream).

-export([main/1]).

main(_) ->
{A1,A2,A3} = now(),
random:seed(A1, A2, A3),
rand().

rand() ->
Num = random:uniform(100),
io:format("~B~n",[Num]),
rand().

./randstream.escript | ./runavg.escript -s 100 -i 100

should return a list of numbers right around 49 (average between 0 and 100).

Monday, April 25, 2011

Console plotting in Erlang

UPDATE: This script is now on gist: https://gist.github.com/dgulino/4750139

Years ago I created a simple Python script to plot a list of numbers in a simple ASCII line graph. I use this script all the time; I extract (with awk,perl,pyliner,...) a column of numbers out of a log file and pipe it in to this script. Very useful since even to today most admin work is done in a character based terminal.

UPDATE (4/28/2011): Also check my other terminal console escript entries: runavg.escript, escript to beam

All numbers are rounded to an integer.  It auto resizes new graph entries when a new max is set (default max is 0), and plots a new max line in bold.  If you set a threshold, it outputs any line over that threshold in red.  Or both.

I created an Erlang escript version of it, just as an exercise. It requires 'tput' to be in the path, this even works with the cygwin version. It also uses An Erlang version of getopt. I didn't bother to install it, I just copied getopt.erl to my src dir and compiled it. Also chmod u+x tgraph.escript

It's called as part of a pipe:
$ cat test.txt | ./tgraph.escript -t 40 -c 40

or with the file as a parameter:
./tgraph.escript test.txt -t 40 -c 40

test.txt:
1
50
20
3
45
34.0
12
0
1000
0
100
34


Output:

****************************************1
****************************************50
****************20
**3
************************************45
***************************34
**********12
0
****************************************1000
0
****100
*34


tgraph.escript:
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp disable
%% Author: Drew Gulino
-module(tgraph).

-export([main/1]).

main(CmdLine) ->
 OptSpecList = option_spec_list(),
 case getopt:parse(OptSpecList, CmdLine) of
  {ok, {Options, NonOptArgs}} ->
   true;
  {error, {Reason, Data}} ->
   Options = [],
   NonOptArgs = [],
   io:format("Error: ~s ~p~n~n", [Reason, Data]),
   version(),
   getopt:usage(OptSpecList, "tgraph")
 end,
 Symbol = get_opt_value(symbol,Options),
 Columns = get_opt_value(columns,Options),
 Display_number = get_opt_value(display_number,Options),
 Threshold = get_opt_value(threshold,Options),
 Maximum = get_opt_value(maximum,Options), 
 
Bold = strip_newlines(os:cmd("tput bold")),
 Init = strip_newlines(os:cmd("tput init")),
 Dim = strip_newlines(os:cmd("tput sgr0")),
 Red = strip_newlines(os:cmd("tput setaf 1")),
 %Green = strip_newlines(os:cmd("tput setaf 2")),
 %Yellow = strip_newlines(os:cmd("tput setaf 3")),
 %Blue = strip_newlines(os:cmd("tput setaf 4")),
 %Magenta = strip_newlines(os:cmd("tput setaf 5")),

 case NonOptArgs of
  [] -> 
  F = standard_io;
  _ ->
  {ok, F} = file:open(NonOptArgs, read)
 end,
  proc_file(F, {Symbol, Columns, Display_number, Threshold, Maximum} , {Bold, Init, Dim, Red}).

version() ->
 io:format("Version: 1.1\n").

get_opt_value(Key, Options) ->
 case lists:keyfind(Key,1,Options) of
  {Key, Value} ->
  Value
 end,
 Value.

option_spec_list() ->
 %CurrentUser = os:getenv("USER"),
 [
 %% {Name, ShortOpt, LongOpt, ArgSpec, HelpMsg}
 {help, $h, "help", undefined, "Show the program options"},
 {version, $v, "version", undefined, "Version"},
 {display_number, $n, "display_number", {boolean, true}, "Display number w/graph"},
 {columns, $c, "columns", {integer, 72}, "Display columns (default = 72)"},
 {symbol, $s, "symbol", {string, "*"}, "Symbol to display (default = '*')"},
 {threshold, $t, "threshold", {integer, 0}, "Will color lines over this value"},
 {maximum, $m, "maximum", {integer, 0}, "Presets the scale for this maximum value (default = 0)"}
 ].

proc_file(F, Options, Tput) ->
{Symbol, Columns, Display_number, Threshold, Maximum} = Options, 
{Bold, Init, Dim, Red} = Tput,
%Columns = erlang:list_to_integer(os:cmd("tput cols")) - 8},
L = io:get_line(F, ''),
 case L of
  eof ->
   ok;
  "\n" ->
   false;
  Line ->
   Stripped = strip_newlines(Line),
   Num = cast_to_integer(Stripped),
   io:put_chars(Init),
   case Num > 0 of
    true ->
     case Num >= Maximum of
      true ->
       NewMax = Num,
       io:put_chars(Bold);
      false ->
       NewMax = Maximum,    
       io:put_chars(Dim)
     end,
     Scale = Columns / NewMax, 
     Graph = lists:map(fun(_) -> io_lib:format(Symbol,[]) end , lists:seq(1,erlang:round(Num * Scale))),
     case Threshold of
      0 -> 
       false;
      _ ->
       case Num >= Threshold of
        true ->
         io:put_chars(Red);
        false -> 
         %io:put_chars(Init)
         false
       end
     end,
     case Display_number of
      true ->
       io:format("~s~p~n",[Graph,Num]);
      false ->
       io:format("~p~n",[Graph])
     end,                                
     NewOptions = {Symbol, Columns, Display_number, Threshold, NewMax},
     proc_file(F,NewOptions, Tput);
    false ->
     io:put_chars(Dim),
     io:put_chars(Init),
     io:format("~p~n",[Num]),
     proc_file(F,Options, Tput)
   end
 end.

strip_newlines(String) ->
 string:strip(re:replace(String,"(.*)[\n\r]","\\1", [global,{return,list}])).

cast_to_integer([]) ->
 [];
cast_to_integer(Input) when is_integer(Input) ->
 Input;
cast_to_integer(Input) when is_float(Input) ->
 erlang:round(Input);
cast_to_integer(Input) when is_list(Input)->
 case lists:member($., Input) of
  true ->
   erlang:round(erlang:list_to_float(Input));
  false ->      
   erlang:list_to_integer(Input)
end.

UPDATE (4/26/2001):
Here's a link to the original python script: tgraph.py

UPDATE (4/27/2011):
Version 1.1:
1) Fixed bug where lines were never dimmed after being bolded in cygwin
2) Changed the compiler flags to disable smp and not register the process name.  Both not needed.  One note: Couldn't get +Bc to work in Windows.  This should change the break key to Ctrl-C, but still stays Ctrl-Break.
3) Moved tput initialization out of working loop, now runs quickly.

Wednesday, April 20, 2011

iOS consolidated.db workaround for hacked devices

Looks like Apple is tracking iOS devices an recording that info in clear text:
http://radar.oreilly.com/2011/04/apple-location-tracking.html

Here's a way to ensure this data is not recorded:
 
You must have a hacked iOS device, and either Mobile Terminal or an SSH login.  You must also know the root password.  You first remove/move this file, and recreate it as a symbolic link to /dev/null like:
 
su
cd /System/Library/Frameworks/CoreLocation.framework/Support
rm consolidated.db
ln -s /dev/null consolidated.db
 
Anything written to this 'file' is sent to /dev/null, so it is not saved on the file system.  I've done this on a hacked device, and Location Services continue to work.

Sunday, April 17, 2011

90th percentile done wrong

I'm no statistician, but I do load test a lot of systems and report upon them.

It's common practice to report upon a particular measure using the average of the 90th percentile of the data (throwing out the slowest 10%): response times, throughput, etc. This is done to remove the outliers; the 2 days response time for a call that normally takes 100ms.

I got to thinking; if the slowest 10% is obviously wrong, why isn't the fastest 10%? Seems just taking the slow outliers is cheating.

I think a good compromise between simplicity and accuracy would be to throw out the slowest 5% and the fastest 5%.

A statistician would know better I'm sure. But explaining 90th percentile to your boss/customer is generally hard enough.

Friday, April 15, 2011

Erlang Hot Code Swapping - Interfaces

I read Raymond Tay's 2007 post Erlang Hot Code Swapping a while back, and realized it was not quite right, or at least not complete. The idea and code behind code swapping works, but the execution of the code in the blog didn't actually demonstrate that fact. He changes the calling functions along with swapping the code; you shouldn't have to do that.

Which brings me to interfaces. EJBs (what he was trying to emulate) are all about interfaces. You specifically have to have separate files for the interface and the implementation. In fact, EJB 1.0 was way too heavy with the interfaces, and it was a pain to work with.

But the general idea of an interface is awesome. USB anyone? Back in the bad old days there was the RS-232 serial interface, which was woefully under-standardized, so there was no guarantee that plugging two RS-232 devices together would work (usually the opposite). You'd spend a lot of time trying different configurations to get things work (Remember 9600-N-8-1?).

Interfaces dictate a list of methods (or functions) that can always be expected to be implemented by a piece of code. This is great for long running code that must work with other systems. The implementation changes over time, but the developer knows if he/she (ok, most likely he, but only statistically) is going to change the interface, they will break compatibility with other systems.

The neat thing is that the interface is already defined in Erlang code that implements callbacks; they are just the exported functions that will make the callbacks. But the Java idea of an Interface is a list of methods that are separate from the implementation that can be share across implementations. Interfaces aren't a language feature of Erlang, but OTP behaviours are Interfaces (among other things): They require a list of exported functions.

What's the best way to implement an interface in Erlang without OTP? The best way I've figured is this: Move the public functions from the private callback functions into separate modules, and then import the public functions. This way two implementations that import the interface module will have to share the same interface.

First, the generic container code:

container.erl:
-module(container).
-export([start/1, rpc/2, swap_code/1]).

-include("callback.hrl").

start(Mod) ->
register(?SERVERNAME, spawn(fun() -> loop(?SERVERNAME, Mod, Mod:init()) end)).

swap_code(Mod) -> rpc(?SERVERNAME, {swap_code, Mod}).

%
% Standard code for abstracting the "RPC-call" layer
%
rpc(Name, Request) ->
    Name ! {self(), Request},
    receive
        {Name, Response} -> Response
    end.

%
% Standard code for looping and waiting for messages from clients
%
loop(Name, Mod, OldState) ->
    receive
        {From, {swap_code, NewCallbackMod}} ->
            From ! {Name, ack},
            loop(Name, NewCallbackMod, OldState);
        {From, Request} ->
            {Response, NewState} = Mod:handle(Request,OldState),
            From ! {Name, Response},
            loop(Name, Mod, NewState)
    end.
The registered server name is stored in an .hrl file included in the container and callback code:

callback.hrl:
-define(SERVERNAME, moneyserver).
callback.erl:
-module(callback).
-export([dollarToYen/1, yenToEuro/1]).
-include("callback.hrl").

-import(container, [rpc/2]).

%% client routines
dollarToYen(Dollars) -> rpc(?SERVERNAME, {convertToYen, Dollars}).
yenToEuro(Yen) -> rpc(?SERVERNAME, {convertToEuro, Yen}).
callback_impl.erl:
-module(callback_impl).
-export([init/0, handle/2]).
-import(container, [rpc/2]).
%% client routines
-import(callback, [dollarToYen/1,yenToEuro/1]).

%% callback routines
init() -> dict:new().

handle({convertToYen, Dollars}, Dict) -> { Dollars * 126, Dict};
handle({convertToEuro, Yen}, Dict) -> {Yen * 0.0077, Dict}.

callback_impl2.erl:
-module(callback_impl2).
-export([init/0, handle/2]).
-import(container, [rpc/2]).
%% client routines
-import(callback, [dollarToYen/1,yenToEuro/1]).

%% callback routines
init() -> dict:new().

handle({convertToYen, Dollars}, Dict) -> { Dollars * 126 * 126, Dict};
handle({convertToEuro, Yen}, Dict) -> {Yen * 0.0077 * 0.0077, Dict}.

How this works:

RUNTIME:

> c(container).
> c(callback).
> c(callback_impl).
> c(callback_impl2).

% our first implementation:
> container:start(callback_impl).
> callback:dollarToYen(1).
126
> callback:yenToEuro(1).
0.0077

> container:swap_code(callback_impl2).
ack
> callback:yenToEuro(1).
5.929e-5
> callback:dollarToYen(1).
15876

Same interface, same exact call, but hot swapped implementations!