#!/usr/bin/perl ####################################################### # # Creates streamlined quotes file for estimating the # prevailing quote. # # Dale W.R. Rosenthal # $Id: CreateStreamlinedQuotesFile.pl,v 1.2 2008/04/06 23:03:06 dale Exp $ ####################################################### use strict; use Getopt::Long; my($trade_file, $quote_file); my $opts = { "tradefile=s" => \$trade_file, "quotefile=s" => \$quote_file }; GetOptions(%$opts); die("usage: $0 -tradefile file -quotefile file\n") if !defined $trade_file or !defined $quote_file; my @months = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC); my @month_nums = 1..12; my %mon2mm = (); @mon2mm{@months} = @month_nums; Log("Starting $0"); LogLineStart("Reading in R3000 names (universe of interest)..."); my $r3000_sectors = ReadInUniverseAndSectors("sectors.full.csv"); LogLineAppend("done."); LogLineEnd(); LogLineStart("Reading in trades..."); my $trades = GetTrades($trade_file); LogLineAppend("done."); LogLineEnd(); ReportOutputHeader(); my @qtfieldnames = OpenQuoteFileAndReadHeader($quote_file); my $done = 0; my $quote_cache = []; while (!$done) { # get quotes for next symbol in quote file (and our universe) # and save quote for following symbol in quote cache my $quotes; ($quotes, $done) = GetNextSymbolQuotes(\@qtfieldnames, $r3000_sectors, $quote_cache); if (@$quotes == 0) { LogLineAppend("done."); LogLineEnd(); next; } my $symbol = $quotes->[0]->{symbol}; # ignore quotes if no trades if (!exists $trades->{$symbol}) { LogLineAppend("no trades! done."); LogLineEnd(); next; } # iterate through time-ordered trades, finding and writing # quotes within previous-90-seconds window my $max_idx = scalar(@$quotes) - 1; my $qtstart_idx = 0; my $qtend_idx = 0; foreach my $t (@{$trades->{$symbol}}) { my $market = PrimaryExchange($symbol); my $tdate = $t->{trade_date}; my($dd, $mon, $ccyy) = unpack("a2a3a4", $tdate); my $ccyymmdd = sprintf("$ccyy%02d$dd", $mon2mm{$mon}); my($hh, $mm, $ss) = split /:/, $t->{timestamp}; my $ttime_seconds = $hh*60*60 + $mm*60 + $ss; $qtstart_idx = QuoteSearch($quotes, $ttime_seconds, $qtstart_idx, $max_idx); $qtend_idx = QuoteSearch($quotes, $ttime_seconds - 90, $qtend_idx, $max_idx); my(@bids, @asks, @bidtimes, @asktimes); my($prev_bid, $prev_ask); for (my $i = $qtend_idx; $i <= $qtstart_idx; $i++) { my $q = $quotes->[$i]; if ($q->{insd_bid_pr} != $prev_bid) { push @bids, $q->{insd_bid_pr}; push @bidtimes, sprintf("%0.4f", $ttime_seconds - $q->{timeseconds}); $prev_bid = $q->{insd_bid_pr}; } if ($q->{insd_ask_pr} != $prev_ask) { push @asks, $q->{insd_ask_pr}; push @asktimes, sprintf("%0.4f", $ttime_seconds - $q->{timeseconds}); $prev_ask = $q->{insd_ask_pr}; } } print(STDOUT "$symbol,$market,$ccyymmdd,$ttime_seconds,", scalar(@bids), ",BIDDELAYS,", join(",", @bidtimes), ",BIDS,", join(",", @bids), ",", scalar(@asks), ",ASKDELAYS,", join(",", @asktimes), ",ASKS,", join(",", @asks), "\n"); } LogLineAppend("output done."); LogLineEnd(); } close QTFILE; close STDOUT; Log("Stopping $0"); exit(); sub Log { my($message) = @_; my($ss, $mm, $hh, $dd, $mo, $yr) = localtime(); my $timestamp = sprintf("%4d%02d%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dd, $hh, $mm, $ss); print(STDERR "$timestamp: $message\n"); } sub LogLineStart { my($message) = @_; my($ss, $mm, $hh, $dd, $mo, $yr) = localtime(); my $timestamp = sprintf("%4d%02d%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dd, $hh, $mm, $ss); print(STDERR "$timestamp: $message"); } sub LogLineAppend { my($message) = @_; print(STDERR "$message"); } sub LogLineEnd { print(STDERR "\n"); } sub ReadInUniverseAndSectors { my($universe_filename) = @_; my $universe_map = {}; open(UNIVERSEFILE, $universe_filename) or die "Cannot read in universe (from '$universe_filename')!"; my $headerline = ; chomp $headerline; my(@universe_lines) = ; close UNIVERSEFILE; chomp @universe_lines; foreach my $ul (@universe_lines) { my($symbol, $sector, $industry) = split /,/, $ul; $universe_map->{$symbol} = $sector; } return $universe_map; } sub GetTrades { my($trade_file) = @_; open(TRADEFILE, $trade_file) or die "Cannot read in trades!"; my $headerline = ; chomp $headerline; my(@tfieldnames) = split /,/, $headerline; my(@tradelines) = ; close TRADEFILE; chomp @tradelines; LogLineAppend("done."); LogLineEnd(); LogLineStart("Creating trades data structure..."); my @resolution_queue = (); my $trade_list = {}; foreach my $tl (@tradelines) { my @tfields = split /,/, $tl; my $t = {}; @$t{@tfieldnames} = @tfields; if (@resolution_queue > 0 and $t->{symbol} eq $resolution_queue[0]->{symbol} and $t->{trade_date} eq $resolution_queue[0]->{trade_date} and $t->{timestamp} eq $resolution_queue[0]->{timestamp}) { push @resolution_queue, $t; next; } # now handle same-second trades my $ntrades = scalar @resolution_queue; my $i = 1; if (@resolution_queue > 0) { while (my $sst = shift(@resolution_queue)) { my $frac_secs = unpack("xa*", sprintf("%.7f", $i/($ntrades+1))); $sst->{timestamp} .= $frac_secs; push @{$trade_list->{$sst->{symbol}}}, $sst; $i++; } } push @resolution_queue, $t; } # clean out the queue my $ntrades = scalar @resolution_queue; my $i = 1; while (my $sst = shift @resolution_queue) { $sst->{timestamp} .= sprintf(".%.7f", $i/($ntrades+1)); push @{$trade_list->{$sst->{symbol}}}, $sst; $i++; } return $trade_list; } sub ReportOutputHeader { my $head = join(",", qw(symbol market date timeseconds numbids BIDDELAYS biddelaysvector BIDS bidsvector numasks ASKDELAYS askdelaysvector ASKS asksvector)); print STDOUT "$head\n"; } sub OpenQuoteFileAndReadHeader { my($quote_file) = @_; # Get header info from the quotes file if (!open(QTFILE, $quote_file)) { Log("Cannot open quote file '$quote_file'!"); exit -1; } my $qtheader = ; chomp $qtheader; my @qtfieldnames = split /,/, $qtheader; return @qtfieldnames; } sub GetNextSymbolQuotes { my($qtfieldnames, $r3000_map, $samesecond_quotes) = @_; ### A small confiteor here: There is a logic problem in the loop ### below. If a symbol has only one quote in a given day, I never ### log that I process it. I do process it, I just don't say so. ### It's not a serious error -- certainly not serious enough to ### warrant messing with working code. Nonetheless, it is an ### error. Mea culpa. Mea maxima culpa. # initialize quote structure and logging vars my $done = 0; my $lastsymbol = ""; my $first_iter = 1; my $ordered_quote_list = []; while (my $lyne = ) { chomp $lyne; my(@fieldz) = split /,/, $lyne; my $qt = {}; @$qt{@$qtfieldnames} = @fieldz; # Handle symbol transitions (logging and data structures) if ($first_iter) { if (exists $r3000_map->{$qt->{symbol}}) { LogLineStart("Processing $qt->{symbol}, $qt->{q_date}: "); } else { LogLineStart("Skipping $qt->{symbol}, $qt->{q_date}: "); } $first_iter = 0; $lastsymbol = $qt->{symbol}; } if (@$samesecond_quotes > 0) { # handle inserting quote in (already extant) list # of same-second quotes if($qt->{symbol} eq $samesecond_quotes->[0]->{symbol} and $qt->{q_date} eq $samesecond_quotes->[0]->{q_date} and $qt->{q_time} eq $samesecond_quotes->[0]->{q_time}) { push @$samesecond_quotes, $qt; next; } # print out complete list of same-second quotes my $i = 1; my($hh, $mm, $ss) = split /:/, $samesecond_quotes->[0]->{q_time}; my $num_ss_quotes = scalar @$samesecond_quotes; while (my $quote = shift @$samesecond_quotes) { if ($quote->{q_condition} eq "") { $i++; next; } my $time_in_seconds = ($hh*60 + $mm)*60 + $ss + $i/($num_ss_quotes+1); $quote->{timeseconds} = $time_in_seconds; push @$ordered_quote_list, $quote; $i++; } } # and start a new list of same-second quotes if the # quote is for a symbol in our universe of interest push @$samesecond_quotes, $qt if exists $r3000_map->{$qt->{symbol}}; # And jump outside this procedure when we hit a new symbol if ($qt->{symbol} ne $lastsymbol) { LogLineAppend("quotes "); return ($ordered_quote_list, $done); } } # clean out list of same-second quotes; note that we have # consumed the quote file $done = 1; my $i = 1; my($dd, $mon, $ccyy) = unpack("a2a3a4", $samesecond_quotes->[0]->{q_date}); my $ccyymmdd = sprintf("$ccyy%02d$dd", $mon2mm{$mon}); my($hh, $mm, $ss) = split /:/, $samesecond_quotes->[0]->{q_time}; my $num_ss_quotes = scalar @$samesecond_quotes; while (my $quote = shift @$samesecond_quotes) { if ($quote->{q_condition} eq "") { $i++; next; } my $time_in_seconds = ($hh*60 + $mm)*60 + $ss + $i/($num_ss_quotes+1); $quote->{timeseconds} = $time_in_seconds; push @$ordered_quote_list, $quote; $i++; } return ($ordered_quote_list, $done); } # Find quote prevailing at $time2find sub QuoteSearch { my($quotes, $time2find, $start_idx, $end_idx) = @_; return $start_idx if $start_idx == $end_idx; return 0 if $quotes->[$start_idx]->{timeseconds} > $time2find; return $end_idx if $quotes->[$end_idx]->{timeseconds} <= $time2find; return $start_idx if ($quotes->[$start_idx]->{timeseconds} <= $time2find and $end_idx -$start_idx == 1); my $split_idx = int(($start_idx + $end_idx)/2); if ($quotes->[$split_idx]->{timeseconds} <= $time2find) { return QuoteSearch($quotes, $time2find, $split_idx, $end_idx); } else { return QuoteSearch($quotes, $time2find, $start_idx, $split_idx); } } sub PrimaryExchange { my($symbol) = @_; my @amex_names = qw(AFP AVD AVN AX BCP BHL BIO BL BMI CAC CAS CHC COI CPD CTO CUB DAR DFC DHB DHC END FIZ GRC GSX GTE GW HH HT HTC IMA IVX KFX LB LGN LNG MIX MLP MSS MWP NBY NHC NHR NVR OHB OMR PDC PGC PRK PRZ PSB RIV SEB SJW STB TBV TDS TIV TKO TMP TPY TWW USM WFD WSC); my @nyse_multiclass = qw(BF.A BF.B CRD.A CRD.B FCE.A FCE.B FSL FSL.B JW.A JW.B KV.A KV.B MOG.A MOG.B NWS NWSA VIA VIA.B SQA.A SQA.B TRX TRX.B TRY TRY.B); my $primary_exchange; if (grep /^$symbol$/, @amex_names) { $primary_exchange = "A"; } elsif (grep /^$symbol$/, @nyse_multiclass) { $primary_exchange = "N"; } elsif (length($symbol) >= 4) { $primary_exchange = "T"; } else { # only non-AMEX 1, 2, and 3-letter symbols left $primary_exchange = "N"; } return $primary_exchange; } sub Seconds2Time { my($seconds) = @_; my $hh = int($seconds / (60*60)); $seconds -= $hh*60*60; my $mm = int($seconds / 60); $seconds -= $mm*60; return sprintf("$hh:%02d:%02.4f", $mm, $seconds); } sub min { my $min = $_[0]; foreach my $val (@_) { $min = $val if $val < $min; } return $min; } sub max { my $max = $_[0]; foreach my $val (@_) { $max = $val if $val > $max; } return $max; }