#define _GNU_SOURCE #include #include #include #include #include #include "gsl/gsl_cblas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_math.h" /* #include "Rmath.h" */ #define MATHLIB_STANDALONE #define MAX_QUOTES 2000 main(int argc, char **argv) { double nu_nyse, mu_nyse, eta3_nyse, eta4_nyse; double nu_amex, mu_amex, eta3_amex, eta4_amex; double nu_nasdaq, mu_nasdaq, eta3_nasdaq, eta4_nasdaq; char *qtfile_name; /* process command line arguments */ if (argc < 14) errx(1, "usage: %s nyseparams amexparams nasdaqparams streamquotefile\nwhere params are the quadruple nu mu eta3 eta4", argv[0]); nu_nyse = strtod(argv[1], NULL); mu_nyse = strtod(argv[2], NULL); eta3_nyse = strtod(argv[3], NULL); eta4_nyse = strtod(argv[4], NULL); nu_amex = strtod(argv[5], NULL); mu_amex = strtod(argv[6], NULL); eta3_amex = strtod(argv[7], NULL); eta4_amex = strtod(argv[8], NULL); nu_nasdaq = strtod(argv[9], NULL); mu_nasdaq = strtod(argv[10], NULL); eta3_nasdaq = strtod(argv[11], NULL); eta4_nasdaq = strtod(argv[12], NULL); qtfile_name = argv[13]; /* open streamlined quote file or give error and die */ FILE *qtfile; qtfile = fopen(qtfile_name, "r"); if (!qtfile) errx(1, "Can't open streamlined quote file '%s'! Dying.\n", qtfile_name); char *field_tmp = NULL; /* buffer for fields (or header line) from input file */ size_t len = 0; /* holds length of field read */ char symbol[10]; /* stock symbol */ char market; /* primary market: (N)YSE, (A)MEX, Nasda(Q) */ long tdate; /* trade date in CCYYMMDD format */ double ttime; /* trade time in seconds, including fractions */ int quote_count; /* # of quotes to be subsequently parsed */ int i; /* loop control variable to parse quote data */ double quote_times[MAX_QUOTES]; /* delays from quotes to trade */ double quotes[MAX_QUOTES]; /* quotes -- bids or asks */ double cdf_vals[MAX_QUOTES]; /* P(X < quote_times[]) for X ~ Gamma(nu, mu) */ double total_cdf; /* sum of cdf_vals; normalizing constant */ double ebid, eask; /* E(prevailing bid, ask) */ getline(&field_tmp, &len, qtfile); /* fgets(line_tmp, MAX_LINELEN, qtfile); */ /* skip header */ while (getdelim(&field_tmp, &len, ',', qtfile) != -1) { double nu; double mu; double eta3; double eta4; /* while (fgets(line_tmp, MAX_LINELEN, qtfile) != NULL) */ strcpy(symbol, field_tmp); symbol[strlen(field_tmp)-1] = '\0'; getdelim(&field_tmp, &len, ',', qtfile); market = field_tmp[0]; if (market == 'N') { nu = nu_nyse; mu = mu_nyse; eta3 = eta3_nyse; eta4 = eta4_nyse; } else if (market == 'T') { /* O _T_ C trades */ /* AMEX much rarer; Nasdaq (OTC) short-circuits more often */ nu = nu_nasdaq; mu = mu_nasdaq; eta3 = eta3_nasdaq; eta4 = eta4_nasdaq; } else { nu = nu_amex; mu = mu_amex; eta3 = eta3_amex; eta4 = eta4_amex; } getdelim(&field_tmp, &len, ',', qtfile); tdate = strtol(field_tmp, NULL, 10); getdelim(&field_tmp, &len, ',', qtfile); ttime = strtod(field_tmp, NULL); /* Read in bid delays from trade time and map to (gamma) */ /* CDF values. We ignore quotes past a certain age, so */ /* remember total CDF mass used for normalization. */ getdelim(&field_tmp, &len, ',', qtfile); quote_count = (int)strtol(field_tmp, NULL, 10); /* skip quote type; assume bid delays come first */ getdelim(&field_tmp, &len, ',', qtfile); for (i = 0; i < quote_count; i++) { /* cdf_vals[i] = pgamma(strtod(strtok(NULL, delims), NULL), nu, mu, 0, 0); */ getdelim(&field_tmp, &len, ',', qtfile); quote_times[i] = strtod(field_tmp, NULL); cdf_vals[i] = gsl_cdf_gamma_P(quote_times[i], nu, mu); } cdf_vals[quote_count] = 0.0; total_cdf = cdf_vals[0]; /* skip "BIDS" or "ASKS"; assume BIDS come first */ getdelim(&field_tmp, &len, ',', qtfile); /* Read in bids and weight by CDF area for period */ /* the quote existed. Normalize by total_cdf. */ ebid = 0.0; for (i = 0; i < quote_count; i++) { getdelim(&field_tmp, &len, ',', qtfile); quotes[i] = strtod(field_tmp, NULL); ebid += (cdf_vals[i] - cdf_vals[i+1])*quotes[i]/total_cdf; } /* Read in ask delays from trade time and map to (gamma) */ /* CDF values. We ignore quotes past a certain age, so */ /* remember total CDF mass used for normalization. */ getdelim(&field_tmp, &len, ',', qtfile); quote_count = (int)strtol(field_tmp, NULL, 10); /* skip quote type; assume ask delays come second */ getdelim(&field_tmp, &len, ',', qtfile); for (i = 0; i < quote_count; i++) { /* cdf_vals[i] = pgamma(strtod(strtok(NULL, delims), NULL), nu, mu, 0, 0); */ getdelim(&field_tmp, &len, ',', qtfile); quote_times[i] = strtod(field_tmp, NULL); cdf_vals[i] = gsl_cdf_gamma_P(quote_times[i], nu, mu); /* code below is for gamma-Edgeworth correction /* add o(n) correction -- unless we fail to satisfy */ /* the necessary regularity condition (nu > 4) */ if (nu > 4) cdf_vals[i] += eta3*gsl_pow_3(mu)/6* (-gsl_cdf_gamma_P(quote_times[i], nu, mu) + 3*gsl_cdf_gamma_P(quote_times[i], nu-1, mu) - 3*gsl_cdf_gamma_P(quote_times[i], nu-2, mu) + gsl_cdf_gamma_P(quote_times[i], nu-3, mu)); /* add o(n^{3/2}) corrections if we meet regularity conditions */ if (nu > 5) /* kurtosis correction */ cdf_vals[i] += eta4*gsl_pow_4(mu)/24* (gsl_cdf_gamma_P(quote_times[i], nu, mu) - 4*gsl_cdf_gamma_P(quote_times[i], nu-1, mu) + 6*gsl_cdf_gamma_P(quote_times[i], nu-2, mu) - 4*gsl_cdf_gamma_P(quote_times[i], nu-3, mu) + gsl_cdf_gamma_P(quote_times[i], nu-4, mu)); if (nu > 7) /* skew^2 correction */ cdf_vals[i] += eta3*eta3*gsl_pow_6(mu)/72* (gsl_cdf_gamma_P(quote_times[i], nu, mu) - 6*gsl_cdf_gamma_P(quote_times[i], nu-1, mu) + 15*gsl_cdf_gamma_P(quote_times[i], nu-2, mu) - 20*gsl_cdf_gamma_P(quote_times[i], nu-3, mu) + 15*gsl_cdf_gamma_P(quote_times[i], nu-4, mu) - 6*gsl_cdf_gamma_P(quote_times[i], nu-5, mu) + gsl_cdf_gamma_P(quote_times[i], nu-6, mu)); } cdf_vals[quote_count] = 0.0; total_cdf = cdf_vals[0]; /* skip "BIDS" or "ASKS"; assume ASKS come second */ getdelim(&field_tmp, &len, ',', qtfile); /* Read in asks and weight by CDF area for period */ /* the quote existed. Normalize by total_cdf. */ eask = 0.0; char *tok; for (i = 0; i < quote_count - 1; i++) { getdelim(&field_tmp, &len, ',', qtfile); quotes[i] = strtod(field_tmp, NULL); eask += (cdf_vals[i] - cdf_vals[i+1])*quotes[i]/total_cdf; } getdelim(&field_tmp, &len, '\n', qtfile); quotes[i] = strtod(field_tmp, NULL); eask += (cdf_vals[i] - cdf_vals[i+1])*quotes[i]/total_cdf; printf("%s,%d,%0.7f,%0.17f,%0.17f\n", symbol, tdate, ttime, ebid, eask); } fclose(qtfile); free(field_tmp); } /* tdate = strtol(strtok(NULL, delims), NULL, 10); */ /* ttime = strtod(strtok(NULL, delims), NULL); */ /* /\* Read in bid delays from trade time and map to (gamma) *\/ */ /* /\* CDF values. We ignore quotes past a certain age, so *\/ */ /* /\* remember total CDF mass used for normalization. *\/ */ /* quote_count = (int)strtol(strtok(NULL, delims), NULL, 10); */ /* strtok(NULL, delims); /\* skip quote type; assume bid delays come first *\/ */ /* for (i = 0; i < quote_count; i++) { */ /* /\* cdf_vals[i] = pgamma(strtod(strtok(NULL, delims), NULL), nu, mu, 0, 0); *\/ */ /* quote_times[i] = strtod(strtok(NULL, delims), NULL); */ /* cdf_vals[i] = gsl_cdf_gamma_P(quote_times[i], nu, mu); */ /* } */ /* cdf_vals[quote_count] = 0.0; */ /* total_cdf = cdf_vals[0]; */ /* strtok(NULL, delims); /\* skip "BIDS" or "ASKS"; assume BIDS come first *\/ */ /* /\* Read in bids and weight by CDF area for period *\/ */ /* /\* the quote existed. Normalize by total_cdf. *\/ */ /* ebid = 0.0; */ /* for (i = 0; i < quote_count; i++) { */ /* quotes[i] = strtod(strtok(NULL, delims), NULL); */ /* ebid += (cdf_vals[i] - cdf_vals[i+1])*quotes[i]/total_cdf; */ /* } */ /* /\* Read in ask delays from trade time and map to (gamma) *\/ */ /* /\* CDF values. We ignore quotes past a certain age, so *\/ */ /* /\* remember total CDF mass used for normalization. *\/ */ /* quote_count = (int)strtol(strtok(NULL, delims), NULL, 10); */ /* strtok(NULL, delims); /\* skip quote type; assume ask delays come second *\/ */ /* for (i = 0; i < quote_count; i++) { */ /* /\* cdf_vals[i] = pgamma(strtod(strtok(NULL, delims), NULL), nu, mu, 0, 0); *\/ */ /* quote_times[i] = strtod(strtok(NULL, delims), NULL); */ /* cdf_vals[i] = gsl_cdf_gamma_P(quote_times[i], nu, mu); */ /* } */ /* cdf_vals[quote_count] = 0.0; */ /* total_cdf = cdf_vals[0]; */ /* strtok(NULL, delims); /\* skip "BIDS" or "ASKS"; assume ASKS come second *\/ */ /* /\* Read in asks and weight by CDF area for period *\/ */ /* /\* the quote existed. Normalize by total_cdf. *\/ */ /* eask = 0.0; */ /* char *tok; */ /* for (i = 0; i < quote_count; i++) { */ /* tok = strtok(NULL, delims); */ /* quotes[i] = strtod(tok, NULL); */ /* ebid += (cdf_vals[i] - cdf_vals[i+1])*quotes[i]/total_cdf; */ /* } */