#!/usr/bin/perl ####################################################### # # Creates ArcaTrade dataset for analysis to test side # inference method # # Dale W.R. Rosenthal # $Id: CreateArcaTradeDataset.pl,v 1.1 2008/03/26 19:33:44 dale Exp $ ####################################################### use strict; use Getopt::Long; my($trade_file); GetOptions("tradefile=s" => \$trade_file); die("usage: $0 -tradefile file\n") if !defined $trade_file; Log("Starting $0"); LogLineStart("Reading in all trades for one day"); my @trdfieldnames = OpenTradeFileAndReadHeader($trade_file); my @tradelines = ; close TRDFILE; chomp @tradelines; LogLineEnd(); LogLineStart("Reading in Russell 3000 members for 2004H2-2005H1"); open(RUSSELLFILE, "R3000.2004.csv") or die "Cannot read in Russell 3000 names!"; my @r3000_names = ; close RUSSELLFILE; chomp @r3000_names; my %r3000_map = (); @r3000_map{@r3000_names} = (1) x @r3000_names; LogLineEnd(); ReportOutputHeader(); # We require that these files are ordered (date, symbol, time). # So all observations for the earliest day come first, ordered # by symbol. Within a symbol, observations are ordered by time. my @months = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC); my @month_nums = 1..12; my %mon2mm = (); $mon2mm{@months} = @month_nums; my @fill_fields = qw(volume price buy_sell arca_bid_price arca_bid_volume arca_ask_price arca_ask_volume market_bid_price market_bid_volume market_ask_price market_ask_volume ab_order_id); my $lastfill = {}; foreach my $lyne (@tradelines) { my(@fieldz) = split /,/, $lyne; my $fill = {}; @$fill{@trdfieldnames} = @fieldz; if ($lastfill->{symbol} ne $fill->{symbol}) { LogLineEnd(); if (exists $r3000_map{$fill->{symbol}}) { LogLineStart("Processing $fill->{symbol}, $fill->{trade_date}: "); } else { LogLineStart("Skipping $fill->{symbol}, $fill->{trade_date}: "); } } if (!exists $r3000_map{$fill->{symbol}}) { $lastfill = $fill; next; } my($dd, $mon, $ccyy) = unpack("a2a3a4", $fill->{trade_date}); my $ccyymmdd = sprintf("$ccyy%02d$dd", $mon2mm{$mon}); my($hh, $mm, $ss) = split /:/, $fill->{timestamp}; my $time_in_seconds = ($hh*60 + $mm)*60 + $ss; print(STDOUT "$ccyymmdd,$time_in_seconds,$fill->{symbol},", PrimaryExchange($fill->{symbol}), ",", join(",", @$fill{@fill_fields}), "\n"); $lastfill = $fill; } LogLineEnd(); Log("Stopping $0"); exit 0; sub Log { my($message) = @_; my($ss, $mm, $hh, $dd, $mo, $yr) = localtime(); my $timestamp = sprintf("%4d%02d%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dd, $hh, $mm, $ss); print(STDERR "$timestamp: $message\n"); } sub LogLineStart { my($message) = @_; my($ss, $mm, $hh, $dd, $mo, $yr) = localtime(); my $timestamp = sprintf("%4d%02d%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dd, $hh, $mm, $ss); print(STDERR "$timestamp: $message"); } sub LogLineAppend { my($message) = @_; print(STDERR "$message"); } sub LogLineEnd { print(STDERR "\n"); } sub ReportOutputHeader { my $head = join(",", qw(date timeseconds symbol primexch shares price buy_sell arcabid arcabidsz arcaask arcaasksz mktbid mktbidsz mktask mktasksz abid)); print STDOUT "$head\n"; } sub OpenTradeFileAndReadHeader { my($trade_file) = @_; # Get header info from the trades file if (!open(TRDFILE, $trade_file)) { Log("Cannot open trade file '$trade_file'!"); exit -1; } my $trdheader = ; chomp $trdheader; my @trdfieldnames = split /,/, $trdheader; return @trdfieldnames; } sub PrimaryExchange { my($symbol) = @_; my @amex_names = qw(AFP AVD AVN AX BCP BHL BIO BL BMI CAC CAS CHC COI CPD CTO CUB DAR DFC DHB DHC END FIZ GRC GSX GTE GW HH HT HTC IMA IVX KFX LB LGN LNG MIX MLP MSS MWP NBY NHC NHR NVR OHB OMR PDC PGC PRK PRZ PSB RIV SEB SJW STB TBV TDS TIV TKO TMP TPY TWW USM WFD WSC); my @nyse_multiclass = qw(BF.A BF.B CRD.A CRD.B FCE.A FCE.B FSL FSL.B JW.A JW.B KV.A KV.B MOG.A MOG.B NWS NWSA VIA VIA.B SQA.A SQA.B TRX TRX.B TRY TRY.B); my $primary_exchange; if (grep /^$symbol$/, @amex_names) { $primary_exchange = "A"; } elsif (grep /^$symbol$/, @nyse_multiclass) { $primary_exchange = "N"; } elsif (length($symbol) >= 4) { $primary_exchange = "T"; } else { # only non-AMEX 1, 2, and 3-letter symbols left $primary_exchange = "N"; } return $primary_exchange; } sub MarketOpen { my($date) = @_; # Currently, this is just a stub return "9:30:00"; } sub MarketClose { my($date) = @_; # Currently, this is just a stub return "16:00:00"; } sub Seconds2Time { my($seconds) = @_; my $hh = int($seconds / (60*60)); $seconds -= $hh*60*60; my $mm = int($seconds / 60); $seconds -= $mm*60; return sprintf("$hh:%02d:%02.4f", $mm, $seconds); } sub min { my $min = $_[0]; foreach my $val (@_) { $min = $val if $val < $min; } return $min; } sub max { my $max = $_[0]; foreach my $val (@_) { $max = $val if $val > $max; } return $max; }