R: Learning by Doing

From TechWiki

R by example.
For a quick start to the R statistical computing program, goto R...

Contents

Used Datasets

E.g., Media:Ozone.xls

R Code

As file:

Embedded:

#####Data Manipulation

## First Steps

# Load data frame
mydata <- read.table("http://j-node.homeip.net/tech_wiki/images/7/77/Ozone.txt")
series <- scan("http://j-node.homeip.net/tech_wiki/images/2/25/Eur_usd-small.txt")
myseries <- as.ts(series)


# Look at data
mydata
summary(mydata)
str(mydata)
coef(mydata)
names(mydata)


# Visualize
plot(mydata)
pairs(mydata, pch='.', gap=0)


# Objects
ls()


# Extracting elements
mydata$day

mydata[30,2]
mydata[30:35,2:3]
mydata[30,]
mydata[,2]
mydata[,'vdht']
mydata[1:30,-1]

# New Columns
mydata[,'lvdh'] <- log(mydata[,'vdht'])


## Creating Objects

#Concatenating Strings

str <- 'Fractional Brownian Motion'
paste(c(str, " PDF"), collapse=""))

#Vectors
cat("name1 name2 name3", "2 3 5 7", "11 13 17", "111 222 333", file="example.dat", sep="\n")

vec <- numeric(10)

vec <- 1:5
vec2 <- c(1,3,2)
vec <- c(vec2, 4,5,6)
vec <- c("A","B","C")
vec <- mydata[,2]
vec <- seq(0, 7, by=1.2)
vec <- seq(-1, 1, length=10)
vec <- rep(c(1,2,3), 5)
vec <- rep(c(1,2,3), length=8)
vec <- (1:5)>3

vec <- seq(0, 7, by=1.2)
vec[c(1,3,6)]
vec[-2]
vec[-c(1,3,6)]

which(vec == 2.4)
which.max(vec)
match(vec,6.0)


# Matrices
x <- 1:4
y <- 5:8
mat <- cbind(x, y)
mat <- rbind(x, y, x+y)
mat <- diag(rep(1,10))

mat <- as.matrix(mydata)

mat1 <- matrix(2, nrow=2, ncol=3)
mat2 <- matrix(3, nrow=3, ncol=4)

mat1 %*% mat2
t(mat1)

mat1[,3] <- log(mat1[,1])
mat1[2,] <- log(mat1[1,])
mat1[mat1>1]
mat1[mat1>1 & mat1<3]

(2:5) ^ c(2, 3, 1, 0)


# Checking
is.matrix(mydata)
is.data.frame(mydata)
is.ts(mydata)
mode(mydata)
class(mydata)



##### Plotting

# Matrix
matplot(mat)

# Special
par(mfrow=c(2,3))
par(new=TRUE)

# Plot
plot(x, y, xlab = 'x-axis', ylab = 'y-axis', main = 'Title', cex=0.6, col="gray", ylim=c(1, 3))

# Additional Features
B <- 1.0
err = 0.1
y <- 1
x1 <- -3
x2 <- -2
x3 <- -1
x4 <- 0
txt <- c("B:", B, " +/- ", err)
text(c(x1, x2, x3, x4), y, txt)

legend(0, 0, legend=(c("A", "B", "C")), lty=c(1,3,5), col=c(1,2,3), cex=1)

rug(x)

abline(0.6,0.97)

x <- seq(-4,4,by=0.5)
plot(x,dnorm(x,mean=0,sd=0.9))
lines(x,dnorm(x ,mean=0,sd=0.9))

# Handling Plots
X11()

pdf('name.pdf')
#postscript('name.ps')

dev.off()



##### Defining Functions

myFct <- function(one, two, three) {
	f.max <- max(one, two, three)
	f.vec <- c(one, two, three)
	c(max=f.max, vec=f.vec)
}
val <- myFct(1,2,3)
val[1]
val['max']

myFct <- function(one, two, three) {
	return(sum(one, two, three))
}

numF <- function(x) x + 4 * cos(7*x)



##### Simple Functions

vec <- rep(c(1,2,3), length=8)

sqrt(vec)
max(vec)
sum(vec)
mean(vec)
sd(vec)
var(vec)
range(vec)

apply(mydata, 1, mean)
apply(mydata, 2, mean)



##### Advanced Functions

# Histogram
hist(myseries, breaks=500)

# Density (PDF)
plot(density(myseries))
plot(density(myseries, bw=0.00000001, kernel = "epanechnikov", n=400,from =-.1, to = .1, adjust = 1, cut = 1))

quantile(myseries) 

# CDF
plot(ecdf(myseries))

# Correlation
acf(myseries)

series2 <- scan("http://j-node.homeip.net/tech_wiki/images/f/fa/Chf_jpy-small.txt")
ccf(myseries, series2)


## Linear Regression: Scaling Laws
See more below...

dat <- read.table("http://j-node.homeip.net/tech_wiki/images/f/f3/Eur_usd_lb.txt")
ldat <- log(dat)

# Select interval
Min <- -3.75
Max <- 1
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]

# Compute and plot
plot(X, Y, xlab = 'log(delta x) [%]', ylab = 'log(delta x) [%]', main = 'EUR_USD Lookback')
result <- lm(Y~X)

coefficients(result)
summary(result)
result$coef
coef(result)
result$resid
resid(result)
summary(result)$coefficients[1,1]

abline(result$coef)

plot(result)


# Error propagation
A <- summary(result)$coefficients[1,1]
dA <- summary(result)$coefficients[1,2]
B <- summary(result)$coefficients[2,1]
dB <- summary(result)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)

# Formatting the output
Etxt <- c("E =", B, "+/-", dB)
Ctxt <- c("C =", C, "+/-", C.err)
xmin <- min(X)
ymax <- max(Y)
xrange <- max(X) - min(X)
yrange <- max(Y) - min(Y)
xmin <- xmin - xrange/xmin*0.1
text(c(xmin, xmin + xrange*0.18, xmin + xrange*0.36, xmin + xrange*0.55), ymax, Etxt)
text(c(xmin, xmin + xrange*0.21, xmin + xrange*0.42, xmin + xrange*0.62), ymax-yrange*0.07, Ctxt)


## More Regression
plot(mydata)

# Outlier
out <- which.max(mydata[,'wdsp'])
mydata <- mydata[-out,]

# Transform Data
newdata <- subset(transform(mydata,logupo3=log(upo3)),select=-upo3)

# Linear
lin <- lm(logupo3 ~ ., data=newdata)

# Nonparametric: Y = m(X) + e
# Local Polynomial (loess)
lp <- loess(Y ~ X)
# Smoothing Splines => Degrees of Freedom
df <- lp$trace.hat
ss <- smooth.spline(X, Y,df=df)
# (Nadaraya-Watson) Kernel Regression => Bandwith
kr <- ksmooth(X, Y, bandwidth=0.1)
# Check
plot(X, Y, main="Nonparametric Regression", cex=0.6, col="gray")
lines(kr$x,kr$y, col=1)
lines(X, predict(lp, x=X),lty=3, col=2, cex=0.5)
lines(X, predict(ss, x=X)$y, lty=5, col=3)
legend(0, 0, legend=(c("Kernel", "Local", "Splines")), lty=c(1,3,5), col=c(1,2,3), cex=1)
rug(X) 

# Flexible Regression: Additive Models
library(mgcv)
genaddmod <- gam(logupo3 ~ s(vdht)+s(wdsp)+s(hmdt)+s(sbtp)+s(ibht)+s(dgpg)+s(ibtp)+s(vsty)+s(day), data=newdata)
plot(genaddmod)


## Evaluation

plot(fitted(lin), resid(lin))
qqnorm(resid(lin))
qqline(lin$res)

plot(lin)

plot(predict(lp), Y)
plot(predict(ss)$y, Y)



##### Misc

example(lm)
?lm

set.seed(33)

source('./tmp.R')
cat("summary(result)", summary(result)$coef)


# Loops
for (i in 1:10) {
	# ...
}

myStr <- c('a\n', 'bb\n', 'ccc\n')
for (str in myStr) {
cat(str)
}

# Assignment Example
inc <- matrix(runif(100), nrow=50, ncol=2,byrow=FALSE)
new <- 1
for (i in 2:length(inc[,1])) {
    new[i] <- new[i-1] + inc[i,1]
}

search()
install.packages("earth")
library("earth")

q()


### MySQL
# install.packages("RMySQL")

# Execute MySQL for an array of strings
library(RMySQL)
drv <- dbDriver("MySQL")
con <- dbConnect(drv, "db", "pwd", "usr", "host")
ccy <- c('AUD_JPY', 'USD_JPY')
sink("ccy.txt")
for (instr in ccy) {
	dbListTables(con)
	qryStr <- paste("SELECT COUNT(*) FROM ", instr, "  WHERE time >= '2003-01-01 00:00:00' and time <= '2008-01-01 00:00:00' ORDER BY time;", collapse="")
	res <- dbSendQuery(con, qryStr)
	data <- fetch(res, n = -1)
	cat( instr, data[1,1], '\n')
	dbClearResult(res)
}
sink()
dbClearResult(res)
dbDisconnect(con)


## Extentions:
# Java: http://swik.net/sjava, http://rosuda.org/JGR/
# Web-GUI: http://www.rpad.org/Rpad/



##### On and on...

# round
# sample
# solve
# step
# class
# sapply
# earth; library(earth); actuall new MARS implementation
# gam; library(mgcv)
# rbinom, rpois, rhyper
# runif, rnorm, rlnorm, rt, rF, rchisq, rgamma

Focus: Time Series Analysis

Resources

R Code

Simple Gaussian random walk:

n <- 1000000
rnorm(1000000,0,pi)
x <- rep(NA,n+1)
x[1] <- 11000

for (i in 1:n) {
x[i+1] <- x[i]+dx[i]
}

lx <- log(x)
dlx <- diff(lx)

Some timeDate stuff (compute distribution of deltas and extract mean and variance as double values):

t <- timeDate(data[,2])
dt <- diff.timeDate(t)
dsty <- density(dt)
sig <- sd(dt)
mu <- mean(dt)
sig <- structure(sig,class="double")[1]
mu <- structure(mu,class="double")[1]

Load a time series from MySQL a table

+----+---------------------+---------+---------+
| Id | time                | bid     | ask     |
+----+---------------------+---------+---------+
|  1 | 2004-06-30 12:09:02 | 1.21532 |  1.2155 |
|  2 | 2004-06-30 12:09:06 | 1.21534 | 1.21552 |
|  3 | 2004-06-30 12:09:09 | 1.21524 | 1.21542 |
+----+---------------------+---------+---------+

and plot mid prices and deltas (using diff()):

library(fCalendar)
library(RMySQL)

drv <- dbDriver("MySQL")
rdat <- dbConnect(drv, "table", "usr", "pwd", "host")

res <- dbSendQuery(rdat, "SELECT * FROM table where time >= '2004-08-02 00:00:00' order by Id asc LIMIT 300000;")
ts <- fetch(res, n = -1)

x <- timeDate(ts[,2])
y <- (ts[,3] + ts[,4]) / 2


par(mfrow=c(2,1))

plot.timeDate(x,y, type='l',xlab = 'Time', ylab = 'Price', main = 'Mid Prices')
plot.timeDate(x[-1],diff(y), type='l', xlab = 'Time', ylab = 'Price Change', main = 'Deltas')
 

Plot stuff:

plot(density(diff(log(y))), xlab = 'Price Move', ylab = 'Probability', main = 'PDF Log Returns')
plot(ecdf(diff(log(y))), xlab = 'Price Move', ylab = 'Frequency', main = 'CDF Log Returns')
acf(diff(log(y)), main = 'Autocorrelation Log Returns')
pacf(diff(log(y)), main = 'Partial Autocorrelation Log Returns')

And using the ts object assigned above

library(tseries)
irts <- irts(ts[,2],y)
weekday(irts)
is.weekend(irts)

Load time series from MySQL database, plot its dx and dt densities, and overlay with normal distribution with same mean and variance:

### Empirical values of (theoretical) Gaussian random walk
library(fCalendar)

label <- 'name'

## Load DB
library(RMySQL)
drv <- dbDriver("MySQL")
con <- dbConnect(drv, "db", "usr", "pwd", "host")

postscript(paste(label, '.ps', sep = ""))
par(mfrow=c(1,2))

qry <- paste('SELECT * FROM ', paste(label, ';', sep = ""), sep = "")
res <- dbSendQuery(con, qry)
data <- fetch(res, n = -1)

# Deltas
x <- (data[,3]+data[,4])/2
lx <- log(x)
llx <- (log(data[,3])+log(data[,4]))/2
t <- timeDate(data[,2])

dt <- diff.timeDate(t)
#which(dt <= 0)
#which(dt > 500)
dx <- diff(x)
dlx <- diff(lx)
dllx <- diff(llx)

dsty <- density(dt)
sigma <- sd(dt)
mu <- mean(dt)
plot(density(rnorm(1000000, mu, sigma)), xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), type='p', col='red', main=label, xlab='dt')
par(new=TRUE)
plot(dsty, xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), main=label, xlab='dt', lwd=2)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 8, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 10, mu)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 8, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 25, sigma)

dsty <- density(dx)
sig <- sd(dx)
mu <- mean(dx)
plot(density(rnorm(1000000, mu, sig)), xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), type='p', col='red', main=label, xlab='dx')
par(new=TRUE)
plot(dsty, xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), main=label, xlab='dx', lwd=2)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 6, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 10, mu)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 6, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 25, sigma)

dev.off()

# Write to file
modelSig <- 1/6769.9
sink(paste(label, '.stats', sep = ""))
cat('sigma_dx: ', abs(sig-modelSig)/modelSig *100)
sink()

Linear regression of data and inset plots of distributions:

library(gridBase)
library(Hmisc)

postscript("GRW_GRW_N_TCKS.ps", horizontal=T, width=300, height=300)
#x11()
#par(mar=c(5,5,4,2))
#par(mfrow=c(2,4))


# Get data
dat <- read.table("ts.dat") 

# Approx. percentage
dat$V1 <- dat$V1*100
ldat <- log(dat)

# Select
Min <- log(0.02)
Max <- max(ldat$V1)
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]

# Compute
res <- lm(Y~X)
# Error propagation
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)

slEmpRSx <- function(t) (t/C)^B

# Plot
plot(dat,  xlim=c(exp(min(X)), exp(max(X))), ylim=c(exp(min(Y)), exp(max(Y))), xlab = 'XLAB', ylab = 'YLAB', main = 'TITLE', cex.axis=1.5, log='xy')
res <- lm(Y~X)

#grid(col='black')
#minor.tick(nx=10, ny=10, tick.ratio=0.6) 

par(new=TRUE)
plot(slEmpRSx, exp(min(X)), exp(max(X)), ylim=c(exp(min(Y)), exp(max(Y))), log='xy', col='red', cex.axis=2, lwd=2, yaxt="n", xaxt="n", xlab = 'XLAB', ylab = 'YLAB', main = 'TITLE')

arrows(0.08,190,0.1,slEmpRSx(0.1),angle=15,code=2,lwd=2)
arrows(2,200,3,slEmpRSx(3),angle=15,code=2,lwd=2)

# Inset Plot
vp <- baseViewports()
pushViewport(vp$inner,vp$figure,vp$plot)

pushViewport(viewport(x=0.1,y=0.95,width=.38,height=.38,just=c("left","top")))

par(plt=gridPLT(),new=T)

grid.rect(gp=gpar(lwd=1,col="black"))

ccy1 <- read.table("ts.obs")
dsty <- density(ccy1[,1])
maxX <- max(dsty$x)
maxY <- max(dsty$y)
inc <- (maxY -min(dsty$y))/15
plot(dsty, xlab = '', ylab = '', main = '')
text(maxX,maxY-inc*0.6,'ITITLE', 1)
text(maxX,maxY-2*inc,'I1', 1)
rug(ccy1$V1)

popViewport(4)

# Inset Plot
#vp <- baseViewports()
pushViewport(vp$inner,vp$figure,vp$plot)

pushViewport(viewport(x=0.95,y=0.1,width=.38,height=.38,just=c("right","bottom")))

par(plt=gridPLT(),new=T)

grid.rect(gp=gpar(lwd=1,col="black"))

ccy2 <- read.table("ts.obs")
dsty <- density(ccy2[,1])
maxX <- max(dsty$x)
maxY <- max(dsty$y)
inc <- (maxY -min(dsty$y))/15
plot(dsty, xlab = '', ylab = '', main = '')
text(maxX,maxY-inc*0.6,'ITITLE', 1)
text(maxX,maxY-2*inc,'I2', 1)
rug(ccy2$V1)

popViewport(4)

# Formatting the output
Etxt <- c("E =", B, "+/-", dB)
Ctxt <- c("C =", C, "+/-", C.err)

#sink(file="MAIN.para", append=TRUE)
cat("NAME", "\t", Etxt, "\t", Ctxt, "\t", "#data ", length(X), "\trange ", exp(Min), " - " , exp(Max), "\n")
#sink()

dev.off()

R and Bash

Combining R with Bash, see Script Mashup.

On and on...

Conditional TS Analysis

postscript("Cond-DC-EUR_USD-0.5.ps")
par(mar=c(5,5,4,2))
par(mfrow=c(2,4))


### Data

ccy <- read.table("EUR_USD-DX-OS-LB-2002-12-01_00:00:00-2007-12-01_00:00:00-0.500.obs")


### Calc 1

lab <- 'EUR_USD'
xlab <- '0.5% OS'

delta <- 1
ylab <- '1st following OS [%]'


lgth <- length(ccy[,1])-delta
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	xy[i,2] <- ccy[i+delta, 1]
}

plt <- xy[order(xy[,1]),]

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')




### Calc 2

delta <- 2
ylab <- '2nd following OS [%]'

lgth <- length(ccy[,1])-delta
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	xy[i,2] <- ccy[i+delta, 1]
}

plt <- xy[order(xy[,1]),]

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')



### Calc 3

delta <- 3
ylab <- '3rd following OS [%]'

lgth <- length(ccy[,1])-delta
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	xy[i,2] <- ccy[i+delta, 1]
}

plt <- xy[order(xy[,1]),]

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')



### Calc 4

delta <- 4
ylab <- '4th following OS [%]'

lgth <- length(ccy[,1])-delta
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	xy[i,2] <- ccy[i+delta, 1]
}

plt <- xy[order(xy[,1]),]

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')


### Calc 5

vol <- 4
ylab <- 'Sum 4 OS [%]'


lgth <- length(ccy[,1])-vol
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	for (j in 1:vol) {
		xy[i,2] <- xy[i,2] + ccy[i+j, 1]
	}
}

plt <- xy[order(xy[,1]),] 

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')


### Calc 6

vol <- 10
ylab <- 'Sum 10 OS [%]'


lgth <- length(ccy[,1])-vol
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	for (j in 1:vol) {
		xy[i,2] <- xy[i,2] + ccy[i+j, 1]
	}
}

plt <- xy[order(xy[,1]),] 

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')


### Calc 7

vol <- 20
ylab <- 'Sum 20 OS [%]'


lgth <- length(ccy[,1])-vol
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	for (j in 1:vol) {
		xy[i,2] <- xy[i,2] + ccy[i+j, 1]
	}
}

plt <- xy[order(xy[,1]),] 

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')


### Calc 8

vol <- 80
ylab <- 'Sum 80 OS [%]'


lgth <- length(ccy[,1])-vol
xy <-  matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
	xy[i,1] <- ccy[i, 1]
	for (j in 1:vol) {
		xy[i,2] <- xy[i,2] + ccy[i+j, 1]
	}
}

plt <- xy[order(xy[,1]),] 

plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')

dev.off()

Java, Bash and R

Script Mashup

Nice Figures

Enlarge
2D Plot With Two Y-Axis and X-Axis Labeled by Dates
library(fCalendar)

### Variables
myData <- 'EUR_CHF_2002-01-01-00:00:00_2008-11-02-00:00:00_6MONTH_0.25.r'

instr <- substr(myData,1,7)
from <- substr(myData,9,18)
to <- substr(myData,29,38)
inc <- substr(myData,56,59)
hor <- substr(myData,49,54)

main <- paste(instr, ' ', hor, ' ', inc)

### Load data
load(myData)
lgth <-  length(inData) 
X <- rep(0.0, lgth) 
Y <- rep(0.0, lgth) 
Z <- rep(0.0, lgth)
T <- seq(as.Date('1970-01-01'), by='day', length=lgth)
for (i in 1:lgth) {
        X[i] <- i;
        Y[i] <- inData[[i]]$value$B;
        Z[i] <- inData[[i]]$value$C;
        T[i] <- timeDate(inData[[i]]$beanEnd)
} 

### Plot
par(mar=c(4.5,4.5,3,5)) 
#par(mfrow=c(1,1)) 

#plot(X, Y, xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y)), yaxt='n', xlab = 't', ylab = 'C', main='', type='p', pch=20)
plot(T, Y, ylim=c(min(Y), max(Y)), xaxt='n', xlab = 't (end date)', ylab = 'E', main='', type='p', pch=20)

# x-axis
#sequ <- T[seq(1, by=20, length=lgth/20)]
firstend <- seq(as.Date(from), by='6 month', length=2)[2]

dt <- difftime(to, from, units='days')
yrs <- as.double(dt)/365
iYrs <- as.integer(yrs)

if(yrs >= 3) {
  	sequ <- seq(as.Date(from),as.Date(to), by='year')
} else if (yrs >= 1) {
	sequ <- seq(as.Date(from),as.Date(to), by='3 month')
} else if (yrs >= 0.7) {
	sequ <- seq(as.Date(from),as.Date(to), by='2 month')
} else if (yrs >= 0.2) {
	sequ <- seq(as.Date(from),as.Date(to), by='2 day')
} else {
	sequ <- seq(as.Date(from),as.Date(to), by='1 day')
}
#sequ <- seq(as.Date(from), by='year', length=iYrs)


myX <- c(firstend, as.Date(to))
trsf <- myX[myX > T[1]]
axis.Date(1, at = trsf, labels = format(trsf, format='%m-%d'))
real <- sequ[sequ > T[1]] 
axis.Date(1, at = real, labels = format(real, format='%m-%d')) 
axis.Date(1, at = real, col='black', padj=1.5)
 
# Comment: note  problems with axis.Date

par(new=t) 

plot(X, Y, xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y)), xaxt='n', yaxt='n', xaxt='n', xlab = '', ylab = '', main=main, type='l') 
par(new=t)
plot(X, Z, xlim=c(min(X), max(X)), ylim=c(min(Z), max(Z)), xaxt='n', yaxt='n', xlab = '', ylab = '', main='', pch='*')
par(new=t)
plot(X, Z, xlim=c(min(X), max(X)), ylim=c(min(Z), max(Z)), xaxt='n', yaxt='n', xaxt='n', xlab = '', ylab = '', main='', type='l', lty=3) 
axis(4)

mtext('C', side=4, pad=4)

Enlarge
3D Scatterplot

Library on cran.r-project.org.

Needs version 2.7.0 or later. See [1] or R for updating.

For each t (y-axis) value, the data points are plotted in a for loop. Couldn't figure out how to rotate in the direction of the z-axis.

library(scatterplot3d)

myData <- '/home/jbg/work/svn/j4/EUR_CHF_2002-01-01-00:00:00_2002-08-02-00:00:00_6MONTH_0.50.r'

load(myData)

instr <- substr(myData,1,7)
from <- substr(myData,9,18)
inc <- substr(myData,56,59)
hor <- substr(myData,49,54)
main <- paste(instr, ' ', hor, ' ', inc)

size <- length(inData)

xmax <- -1e10
xmin <- 1e10
ymax <- -1e10
ymin <-  1e10

for (j in 1:size) {
xmax <- max(xmax,  max(inData[[j]]$value$X))
ymax <- max(ymax,  max(inData[[j]]$value$Y))
xmin <- min(xmin,  min(inData[[j]]$value$X))
ymin <- min(ymin,  min(inData[[j]]$value$Y))
}

for (j in 1:size) {
lgth <-  length(inData[[j]]$value$X) 

X <- inData[[j]]$value$X
Y <- inData[[j]]$value$Y

ind <- which(Y==0) 
if(length(ind) > 0) {
 X <- X[-ind];
 Y <- Y[-ind];
} 

X <- log( X ) 
Y <- log( Y ) 
# Linear model 
res <- lm(Y~X) 
# Estimate parameters 
A <- summary(res)$coefficients[1,1] 
dA <- summary(res)$coefficients[1,2] 
B <- summary(res)$coefficients[2,1] 
dB <- summary(res)$coefficients[2,2] 
C <- exp(-A/B) 
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2) 
# Define function 
slEmpRSx <- function(t) (t/C)^B 

# Plot
plo <- scatterplot3d(X,rep(j, lgth), Y, xlim=log(c(xmin,xmax)), zlim=log(c(ymin,ymax)),  ylim=c(1,size), pch='.', box = FALSE, angle = 45, scale.y=1, grid=F, main=main, xlab='DX', zlab='#DC', ylab='t')

# Regression line
y <- slEmpRSx(exp(X))
#plo$points3d(X,rep(j, lgth), log(y), type='l')

par(new=TRUE)
}

Enlarge
3D Contour Plot

Using the empirical data from the scatterplot above to estimate scaling laws.

myData <- 'EUR_CHF_2002-01-01-00:00:00_2008-11-02-00:00:00_6MONTH_0.25.r'

load(myData)

instr <- substr(myData,1,7) 
from <- substr(myData,9,18)
inc <- substr(myData,56,59) 
hor <- substr(myData,49,54)
main <- paste(instr, ' ', hor, ' ', inc)

size <- length(inData)
lgth <- length(inData[[1]]$value$X)

xmax <- -1e10
xmin <- 1e10
ymax <- -1e10
ymin <-  1e10

newX <- seq(1,size,by=3)
mat <- matrix(0, nrow=lgth, ncol=length(newX))

#for (j in 1:size) {
for(j in newX) {
	xmax <- max(xmax,  max(inData[[j]]$value$X))
	ymax <- max(ymax,  max(inData[[j]]$value$Y))
	xmin <- min(xmin,  min(inData[[j]]$value$X))
	ymin <- min(ymin,  min(inData[[j]]$value$Y))
}

myInd <- 1
for (j in newX) {

X <- inData[[j]]$value$X
Y <- inData[[j]]$value$Y

ind <- which(Y==0) 
if(length(ind) > 0) {
 X <- X[-ind];
 Y <- Y[-ind];
}

lX <- log( X ) 
lY <- log( Y ) 

# Linear model 
res <- lm(lY~lX) 
# Estimate parameters 
A <- summary(res)$coefficients[1,1] 
dA <- summary(res)$coefficients[1,2] 
B <- summary(res)$coefficients[2,1] 
dB <- summary(res)$coefficients[2,2] 
C <- exp(-A/B) 
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)

# Define function 
slEmpRSxLog <- function(t) (t/C)^B 
slEmpRSx <- function(t) A+B*t 

#plot(X,Y, log='xy', xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y))) # Real axis vals
#plot(lX,lY, xlim=c(min(lX), max(lX)), ylim=c(min(lY), max(lY))) # Log axis vals

## Real axis vals
#plot(slEmpRSxLog, log='xy', xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y)))
#plot(slEmpRSxLog(X), log='xy')

## Log axis vals
#plot(slEmpRSx, xlim=c(min(lX), max(lX)), ylim=c(min(lY), max(lY)))
#plot(slEmpRSx(X))
#plot(slEmpRSx(lX), log='x')

# Save data
#sequ <- seq(min(X), max(X), by= (max(X) - min(X)) / lgth)
sequ <- seq(min(lX), max(lX), by= (max(lX) - min(lX)) / lgth)
sequ <- sequ[1:lgth]

#mat[,j] <- slEmpRSxLog(sequ)
mat[,myInd] <- slEmpRSx(sequ)
myInd <- myInd + 1
}

persp(sequ, seq(1,length(newX)), mat, phi=25, theta=55, xlab='DX', zlab='#DC', ylab='t', ticktype='detailed', main=main, r=10, shade=0.1, lphi=0, ltheta = -135)

Histogram with Inset

Thanks Alex...

library(lattice)
graphics.off()

times <- read.table("times.dat")

pdf("latency.pdf", onefile=F)

qs <- quantile(times[,1],probs=seq(0,1,0.01));
s  <- subset(times[,1],times[,1]>qs[96]);

plot.new()
h1 <- histogram(times[,1],xlab="latency [ms]",ylab="occurences",main="",col="salmon",type="count")
h2 <- histogram(s,xlab="latency [ms]",ylab="occurences",main="> 95th-centile",col="salmon",type="count")
print(h1,more=T)
print(h2,position = c(.4,.2,.9,.65))

m <- round(mean(times[,1])*100)/100
s <- round(sd(times[,1])*100)/100
text(0.15,0.98,paste("mean = ",m,"  /   std = ",s),pos=4)
q90 <- round(qs[91]*10)/10;
q95 <- round(qs[96]*10)/10;
q99 <- round(qs[100]*10)/10;
text(0.15,0.90,paste("perc.: 90% <",q90,", 95% <",q95,", 99% <",q99),pos=4)

dev.off()

Enlarge
Bar Chart

Thanks Alex...

library(lattice)
graphics.off()

pdf("pie.pdf", onefile=F)

times <- read.table("times.dat")

lat  = times[,1];
exe  = times[,2];
wai  = times[,3];

res=sort(lat,index.return=T);
lat=res$x
exe[res$ix]=exe;
wai[res$ix]=wai;

n=length(lat);

nbSlices=20;
ds=n/nbSlices;

avl=seq(0,0,length=nbSlices)
ave=seq(0,0,length=nbSlices)
avw=seq(0,0,length=nbSlices)

for (i in seq(1,nbSlices)) {
 
   i0=floor(1+(i-1)*ds);
   i1=floor(i*ds);
   if (i == nbSlices) i1=n;

   avl[i]=mean(lat[i0:i1]);
   ave[i]=mean(exe[i0:i1]);
   avw[i]=mean(wai[i0:i1]);
}

y0=0;
y1=max(avl)
x=seq(1,nbSlices)*5-2.5
plot(x,avl,type="l",xlab="",ylab="",xlim=c(-2.5,97.5),ylim=c(y0,y1),lwd=2)
barplot(avl/avl*y1,width=5,add=T,space=0,col="salmon",axisnames=F)
barplot(ave/avl*y1,width=5,add=T,space=0,col="skyblue",axisnames=F)
par(new=T)
plot(x,avl,type="b",xlab="trades ascendingly sorted by latency time [%]",ylab="latency [ms]",xlim=c(-2.5,97.5),ylim=c(y0,y1),lwd=2)

dy=(y1-y0)/4;
axis(4,c(y0,y0+dy,y0+3*dy,y1),c(0,25,75,100))
mtext("times decomposition [%]",side=4)

legend(x=mean(x),y=y1/2,c("execution/latency","waiting/latency"),fill=c("skyblue","salmon"),bg="white",xjust=0.5,yjust=0.5)

dev.off()

Enlarge
2D Plot with Error Bars

Daily seasonality. Thanks Alex...

library(lattice)
graphics.off()

pdf("dailyLat.pdf", onefile=F)

times <- read.table("times.dat")

lat  = times[,1];
hour = times[,8];

sx  = seq(0,0,length=24)
sx2 = seq(0,0,length=24)
nh  = seq(0,0,length=24)

n=length(lat)

for (i in seq(1,n)) {
  sx[hour[i]+1]  = sx[hour[i]+1]+lat[i];
  sx2[hour[i]+1] = sx2[hour[i]+1]+(1.0*lat[i]*lat[i]);
  nh[hour[i]+1]  = nh[hour[i]+1] + 1;
}

av=sx/pmax(nh,1);
std=sqrt(sx2/pmax(nh,1)-av*av);

y0=min(av-std/2);
y1=max(av+std/2);

x <- seq(0,23)
plot(x,av,type="b",ylim=c(y0,y1),xlab="hours",ylab="latency [ms]",main="Daily seasonality")
arrows(x,av-std/2,x,av+std/2,length=.05,angle=90,code=3,col="gray")

dev.off()

Histogram with Daily Bins

Weekly seasonality. Thanks Alex...

library(lattice)
graphics.off()

pdf("weeklyLat.pdf", onefile=F)

times <- read.table("times.dat")

lat  = times[,1];
wday = times[,7];
hour = times[,8];

sx  = seq(0,0,length=168)
sx2 = seq(0,0,length=168)
nh  = seq(0,0,length=168)

n=length(lat)

for (i in seq(1,n)) {
  j=(wday[i]-1)*24+hour[i]+1;
  sx[j]  = sx[j]+lat[i];
  sx2[j] = sx2[j]+(1.0*lat[i]*lat[i]);
  nh[j]  = nh[j] + 1;
}

av=sx/pmax(nh,1);
std=sqrt(sx2/pmax(nh,1)-av*av);

y0=min(av-std/2);
y1=max(av+std/2);

x <- seq(0,167)
plot(x,av,type="l",ylim=c(y0,y1),xlab="hours",ylab="latency [ms]",main="Weekly seasonality")
#arrows(x,av-std/2,x,av+std/2,length=.05,angle=90,code=3,col="gray")

# Infos about the days

lines(c(0,0),c(y0,y1),col="gray")
text(12,y1,"Sun",col="gray",pos=1)

lines(c(24,24),c(y0,y1),col="gray")
text(36,y1,"Mon",col="gray",pos=1)

lines(c(48,48),c(y0,y1),col="gray")
text(60,y1,"Tue",col="gray",pos=1)

lines(c(72,72),c(y0,y1),col="gray")
text(84,y1,"Wed",col="gray",pos=1)

lines(c(96,96),c(y0,y1),col="gray")
text(108,y1,"Thu",col="gray",pos=1)

lines(c(120,120),c(y0,y1),col="gray")
text(132,y1,"Fri",col="gray",pos=1)

lines(c(144,144),c(y0,y1),col="gray")
text(156,y1,"Sat",col="gray",pos=1)

lines(c(168,168),c(y0,y1),col="gray")

dev.off()

The times.dat file looks like

135 135 0 2008 10 9 5 11 47 25
135 135 0 2008 10 10 6 21 55 27
...
HeatMap

To create a heat map from the values given in the file os_heatmap_EUR_USD_2010-04-15.rdat

-1.0;0.0; ... ;0.0;2010-04-16 06:00:12
-1.0;-1.0; ... ;0.0;2010-04-16 06:02:13
-1.0;-1.0; ... ;0.0;2010-04-16 06:23:43
...

use

 
name <- 'os_heatmap_EUR_USD_2010-04-15.rdat'


Y <- c(0.2, 0.22103418361512955, 0.24428055163203402, 0.2699717615152007, 0.29836493952825416, 0.3297442541400258, 0.364423760078102, 0.40275054149409556, 0.44510818569849386, 0.4919206222313903, 0.5436563656918095, 0.6008332047892873, 0.6640233845473102, 0.7338593335238497, 0.811039993368936, 0.8963378140676141, 0.9906064848790244, 1.0947894783454415, 1.209929492882591, 1.337178888455856, 1.4778112197861324, 1.6332339825135327, 1.8050026998868274, 1.994836490962948, 2.2046352761283248, 2.4364987921407, 2.692747607000344, 2.9759463449745738, 3.288929354219418, 3.6348290738886213, 4.017107384637544, 4.439590256288339, 4.906506039421884, 5.422527784131593, 5.99282000947942)

# Params
start <- 1 # >0
number <- 3192
size <- length(dat[,1])
dir <- "/home/jbg/work/myEclipse/j-4.x/"

# Extract data

dat <- read.table(paste(c(dir,name), collapse=""),sep=";",stringsAsFactors=FALSE)
sz <- length(dat[1,])-1

stamp <- dat[,sz+1]
dat <- dat[,1:sz]
x <- as.matrix(dat)

snip <- t(x[start:(start+number),])


# Heat map
library('gplots')
require(graphics); require(grDevices)
#i <- max(abs(snip))
imin <- abs(min(dat))
imax <- max(dat)
i<-max(imax,imin)

mi = 0.0
mx = 0.9

sqUp=seq(mi,mx,mx/(i-1))
sqDo=seq(mx,mi,-mx/(i-1))

cc<-c(rgb(sqUp,sqUp,1.0),rgb(1,1,1),rgb(1.0,sqDo,sqDo))	

Y <- format(Y, digits = 3)
X <- character(number)

#X[1] <- as.character(start)
#X[number] <- as.character(start + number)
X[1] <- stamp[start]
X[number] <- stamp[start + number]
		
heatmap.2(snip, labRow=Y, labCol=X, symkey=F, symbreaks=T, dendrogram='none', revC=TRUE, trace='none', Rowv = NA, Colv = NA, scale="none",col = cc, main = name, xlab = "t", ylab= "DC", margins=c(7,5), keysize = 1.5)

Enlarge


HeatMap with Overlay

os_DCOSClock_heatmap_EUR_USD_2010-05-01_0.2-0.25-0.3-0.35-0.4-0.45-0.5-0.55.rdat:

0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;2010-05-02 20:59:43
0.1;0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;2010-05-02 20:59:47
-0.1;0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;2010-05-02 21:12:12
-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;2010-05-02 21:36:51
-1.0;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;-0.1;2010-05-03 00:08:25
...

os_TBCTH_heatmap_EUR_USD_2010-05-01_0.2-0.25-0.3-0.35-0.4-0.45-0.5-0.55.rdat:

-1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;2010-05-02 20:59:43
0.0;-1.0;0.0;0.0;0.0;0.0;0.0;0.0;2010-05-02 20:59:47
0.0;-0.9;0.0;0.0;0.0;0.0;0.0;0.0;2010-05-02 20:59:47
1.0;0.0;0.0;0.0;0.0;0.0;0.0;0.0;2010-05-02 21:12:12
-0.9;0.0;0.0;0.0;0.0;0.0;0.0;0.0;2010-05-02 21:12:12
0.0;1.0;0.0;0.0;0.0;0.0;0.0;0.0;2010-05-02 21:36:51
...

dir <- "/home/jbg/work/"
name <- 'os_DCOSClock_heatmap_EUR_USD_2010-05-01_0.2-0.25-0.3-0.35-0.4-0.45-0.5-0.55.rdat'
trades <- 'os_TBCTH_heatmap_EUR_USD_2010-05-01_0.2-0.25-0.3-0.35-0.4-0.45-0.5-0.55.rdat'
Y <- c(0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55) # Thresholds
ttl <- 'EUR_USD'

Y <- format(Y, digits = 2)

dat <- 0
loadData()	
start <- 1 #>0
size <- length(dat[,1])

par(mar=c(5,5,3,5))
main()

pdf(paste(dir, name, ".pdf",sep=""),w=14,h=9)
par(mar=c(5,5,3,5))
main()
dev.off()


###############
main <- function() {
	loadData()

	# Draw
	drawHeatMap()
	for (i in c(1:sz)) {
		par(new=T)
		drawTrades(i)
	}
}

snip <- 0
sz <-0
tdat <- 0
number <- 0
stamp <- 0

############### Load data
loadData <- function() {
	# Extract heatmap data
	dat <<- read.table(paste(c(dir,name), collapse=""),sep=";",stringsAsFactors=FALSE)
	# Extract trade data
	tdat <<- read.table(paste(c(dir,trades), collapse=""),sep=";",stringsAsFactors=FALSE)

	# Vars
	sz <<- length(dat[1,])-1
	#start <- 1 # >0
	#size <- length(dat[,1])
	number <<- size-1;
	
	stamp <<- dat[,sz+1]
	dat <- dat[,1:sz]
	x <- as.matrix(dat)
	snip <<- x[start:(start+number),]
	
	#result <- list(snip=snip,sz=sz,tdat=tdat,stamp=stamp)
	#return(result)
}

############### Heatmap
drawHeatMap <- function() {
# Heat map
	library('gplots')
	require(graphics); require(grDevices)
	
	imin <- -8
	imax <- 8
	i<-max(imax,abs(imin))
	
	mi = 0.05
	mx = 0.95
	
	sqUp=seq(mi,mx,mx/(i))
	sqDo=rev(sqUp)	
	
	cc2<-c(rgb(sqUp,sqUp,1.0),'#EEEEFF','#FFEEEE',rgb(1.0,sqDo,sqDo)) # dcs give small values for os
	
	brk <- c(seq(imin,-1,1), -0.1, 0, 0.1, seq(1,imax,1))
	
	image(snip, col=cc2, breaks=brk, axes = FALSE)
}

############### Draw trades
drawTrades <- function(threshInd) {
	
	tstamp <- tdat[,sz+1]
	tx <- as.matrix(tdat[,threshInd])
	
	# Init
	longI <- which(tx==1)
	longvalsI <- tx[tx==1]
	longstampI <- tstamp[longI]
	longXI <- getIndexFromStamp(longstampI)
	
	shortI <- which(tx==-1)
	shortvalsI <- tx[tx==-1]
	shortstampI <- tstamp[shortI]
	shortXI <- getIndexFromStamp(shortstampI)
	
	# Close
	longC <- which(tx<1 & tx >0)
	longvalsC <- tx[tx<1 & tx >0]
	longstampC <- tstamp[longC]
	longXC <- getIndexFromStamp(longstampC)
	
	shortC <- which(tx>-1 & tx<0)
	shortvalsC <- tx[tx>-1 & tx<0]
	shortstampC <- tstamp[shortC]
	shortXC <- getIndexFromStamp(shortstampC)
	
	# Casc
	longCa <- which(tx>1)
	longvalsCa <- tx[tx>1]
	longstampCa <- tstamp[longCa]
	longXCa <- getIndexFromStamp(longstampCa)
	
	shortCa <- which(tx < -1)
	shortvalsCa <- tx[tx < -1]
	shortstampCa <- tstamp[shortCa]
	shortXCa <- getIndexFromStamp(shortstampCa)
	
	# Casc col
	llong <- log(longvalsCa)/log(2)
	lshort <- log(abs(shortvalsCa))/log(2)
	
	lmax <- max(llong)
	smax <- max(lshort)
	mil = 0.1
	mxl = 0.8
	mis = 0.3
	mxs = 0.8
	
	sqLo=seq(mil,mxl,mxl/(lmax))
	sqSh=rev(seq(mis,mxs,mxs/(smax)))
	
	#cclo<-c(rgb(1.0,sqLo,sqLo))#red
	#ccsh<-c(rgb(sqSh,sqSh,1.0))#blue
	cclo<-c(rgb(1.0,(sqLo),0.0)) #yellow
	ccsh<-c(rgb(0.0,rev(sqSh),1.0))
	
	cclo <- cclo[llong]
	ccsh <- ccsh[lshort]
	
	# Plot
	siz <- length(snip[,1])-1
	myoffset <- start -1 + 0.5 # middle of squares
	
	ylev <- seq(1,sz*2,2)
	
	plot(longXI, rep(ylev[threshInd]/(sz*2), length(longXI)), pch=24, col='red', xlim=c(myoffset, myoffset + siz), ylim=c(0,1), xaxs='i', yaxs='i', ylab='DC (%)', xlab='Events (#)', main=ttl, yaxt="n")
	par(new=T)
	plot(shortXI, rep(ylev[threshInd]/(sz*2), length(shortXI)), pch=25, col='blue', xlim=c(myoffset, myoffset + siz), ylim=c(0,1), xaxs='i', yaxs='i', xlab='', ylab='', yaxt="n")
	par(new=T)
	plot(longXC, rep(ylev[threshInd]/(sz*2), length(longXC)), pch=20, col='red', xlim=c(myoffset, myoffset + siz), ylim=c(0,1), xaxs='i', yaxs='i', xlab='', ylab='', yaxt="n")
	par(new=T)
	plot(shortXC, rep(ylev[threshInd]/(sz*2), length(shortXC)), pch=20, col='blue', xlim=c(myoffset, myoffset + siz), ylim=c(0,1), xaxs='i', yaxs='i', xlab='', ylab='', yaxt="n")
	par(new=T)
	plot(longXCa, rep(ylev[threshInd]/(sz*2), length(longXCa)), col=cclo, pch=2, xlim=c(myoffset, myoffset + siz), ylim=c(0,1), xaxs='i', yaxs='i', xlab='', ylab='', yaxt="n")
	par(new=T)
	plot(shortXCa, rep(ylev[threshInd]/(sz*2), length(shortXCa)), col=ccsh, pch=6, xlim=c(myoffset, myoffset + siz), ylim=c(0,1), xaxs='i', yaxs='i', xlab='', ylab='', yaxt="n")

	axis(2, at=ylev/(sz*2),labels=Y, las=2)
	X <- character(2)
	X[1] <- stamp[start]
	X[2] <- stamp[start + number]
	axis(1, at=c(start,start+number-1),labels=X, las=1)
	
}

############### Aux functions
getIndexFromStamp <- function(stampvals) {
	if (length(stampvals)) {
		val <- rep(0, length(stampvals)-1)
		cnt <- 1
		for (i in stampvals) {
			tmp <- which(i==stamp)
			for (j in tmp) {
				val[cnt] <- j
			}
			cnt <- cnt + 1
		}
		val
	}
}

Enlarge

Script Examples

More R scripts here...

Scaling Law Estimation

See also above...

File my.dat:

0.01 7947915.0
0.1 100000.0
1.0 280.0
2.0 220.0
5.0 25.0

R script:

 ### Data

dat <- read.table("my.dat")
lab <- 'LABEL'
xlabT <- 'XLAB'
ylabT <- 'YLAB'


### Estimate

dat$V1 <- dat$V1
dat$V2 <- dat$V2

ldat <- log(dat)

Min <- min(ldat$V1)
Max <- max(ldat$V1)

X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]

res <- lm(Y~X)

# Error propagation
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)

slEmpRSx <- function(t) (t/C)^B

cat('Exp:', B)
cat(' +- ', dB)

cat('\nCst:', C)
cat(' +- ', C.err)
cat('\n')

### Plot

plot(exp(X),exp(Y), xlim=c(min(exp(X)), max(exp(X))), log='xy', ylim=c(min(exp(Y)), max(exp(Y))), cex=1.5, lwd=2.5,  col='blue', ylab = ylabT, xlab = xlabT, main='TITLE', type='l', lty=3, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5)

par(new=TRUE)

plot(exp(X),exp(Y), xlim=c(min(exp(X)), max(exp(X))), log='xy', ylim=c(min(exp(Y)), max(exp(Y))), cex=1.5, lwd=1.5,  col='blue', ylab = ylabT, xlab = xlabT, main='TITLE', type='p', lty=0, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5)

par(new=TRUE)

plot(exp(X),slEmpRSx(exp(X)), log='xy',  type='l', col='red', ylab = ylabT, xlab = xlabT)

Find Min/Max in Data File

You have a file with numbers and want to find the minimal or maximal value. Let's say the file is trade.list and the values are in the fourth column

awk '{print $4}' trade.list > my.list

Perhaps semicolumns are stuck to the numbers, so you need to get rid of them

sed 's/;//g' my.list > real.list

Enter R. Read data

dat <- read.table('real.list', stringsAsFactors=F)

and make numeric

num <- as.numeric(dat[,])

and get rid of NaNs

res <- na.omit(num)

Voila:

max(res)


Price Curve, Trades and Averages

Enlarge

Source the following code in R:

library('fCalendar')

### Tools
dat <- function(t) as.POSIXct(t, format="%Y-%m-%d %H:%M:%S", tz="GMT",origin="1970-01-01 00:00:00")
milis <- function(x) format(as.POSIXct(x, format="%Y-%m-%d %H:%M:%S", tz="GMT",origin="1970-01-01 00:00:00"),"%s")


### Trade list
trades<-read.table("/home/jbg/work/myEclipse/myR/produceTimeSeriesAndTradesAndAverages/trades_2682.txt",sep="\t",stringsAsFactors=F)

lgth <- length(trades[,1])

mydat <- trades[1:lgth,]
x <- as.matrix(mydat)

### Basic infos
val1 <- strsplit(x[1], ' ')[[1]]
ccy <- val1[3]

### Extract from time series
l <- 0
p <- 0
m <- 0
time <- strptime('2007-01-01 00:00:00',format='%Y-%m-%d %H:%M:%S', tz="CET")
for (i in 1:lgth) {
	vals <- strsplit(x[i], ' ')[[1]]
	# Price
	ask <- as.numeric(substring(vals[9],1, nchar(vals[9])-1))
	bid <- as.numeric(substring(vals[8],2, nchar(vals[8])-1))
	m <- c(m, (bid+ask)/2)
	# Trade
	l <- c(l, vals[5])
	p <- c(p, vals[7])
	# Times
	dt1 <- paste(vals[11], ' ' , vals[12], ' ', substring(vals[15],1,4), sep="")
	dt <- as.Date(dt1,format='%B %d %Y')
	date <- strptime(paste(dt, ' ', vals[13],sep=""),format='%Y-%m-%d %H:%M:%S')
	date.ct <- as.POSIXct(date,tz="CET")
	time <- c(time, format(date.ct,tz="GMT",usetz=TRUE))
}

l <- l[2:length(l)]
p <- p[2:length(p)]
m <- m[2:length(m)]
time <- time[2:length(time)]
start <- time[1]
stop <- time[length(time)]

res <- 0.2
res <- floor(as.numeric(difftime(stop, start, units="days")) * res)


###Averages
L <- as.numeric(l[1])
S <- -as.numeric(l[1])*as.numeric((p[1]))
av <- as.numeric(p[1])

for (i in 2:lgth) {
	L <- L + as.numeric(l[i])
	S <- S -as.numeric(l[i])*as.numeric((p[i]))
	if (L!=0) {
		av <- c(av, -S/L)
	}
	else {
		av <- c(av, p[i])
	}
	if (vals[2] == 'PROFIT') {
		L <- 0
		S <- 0
	}
}


### Price curve
system(paste('./aux.sh', ' ', ccy, ' \'' , start, '\' \'', stop, '\' ', res , sep=""))

price<-read.table("/home/jbg/work/myEclipse/myR/produceTimeSeriesAndTradesAndAverages/mySQLresult.dat",sep="\t",stringsAsFactors=F)

price<-price[2:length(price[,2]),]

bid <- as.numeric(price[,2])
ask <- as.numeric(price[,3])
mid <- (bid+ask)/2

st<-min(as.numeric(milis(price[,1])))
en<-max(as.numeric(milis(price[,1])))


mi=min(as.numeric(mid))*0.9999
ma=max(as.numeric(mid))*1.0001

name1 <- paste(ccy, '-trades.pdf',sep="")


### Plot curve
pdf(name1,w=12,h=9)

par(mfrow=c(1,1))
par(mar=c(3,3,3,4)) 

#plot(milis(price[,1]),mid,type='l',xlim=c(st,en),ylim=c(mi,ma),xlab='',ylab='Mid price', xaxt='n', yaxt='n')
plot(milis(price[,1]),bid,type='l',xlim=c(st,en),ylim=c(mi,ma),xlab='',ylab='Price', xaxt='n', yaxt='n', lty=1, col='grey30')
par(new=T)
plot(milis(price[,1]),ask,type='l',xlim=c(st,en),ylim=c(mi,ma),xlab='',ylab='Price', xaxt='n', yaxt='n', lty=1, col='grey50')

fl <-floor(mi*100)/100
cei <- ceiling(ma*100)/100
sequ <- seq(fl, cei, by=(cei-fl)/6);
axis(2,at = sequ, labels = format(sequ, digits=6))

sequ <- seq(dat(price[1,1]), dat(price[length(price[,1]),1]), by='1 day');
sequ <- sequ[-1]
sequ <- sequ[-6]
axis(1, at = as.numeric(milis(sequ)), col='black', labels = format(sequ, format='%d') );

start <- dat(price[1,1])
end <- dat(price[length(price[,1]),1])

axis(1, at = as.numeric(milis(price[1,1])), labels = format(start), padj=1.5) 
axis(1, at = as.numeric(milis(price[length(price[,1]),1])), labels = format(end), padj=1.5) 

### Plot trade
for (i in 1:length(m)) {
	par(new=T)
	plot(c(milis(time[i]),milis(time[i])),c(as.numeric(av[i]),as.numeric(p[i])),type='l',xlim=c(st,en),ylim=c(mi,ma), col='black', xlab='', ylab='', xaxt='n', yaxt='n', lty=2, lwd=2)
}

par(new=T)
plot(milis(time),av,type='p',xlim=c(st,en),ylim=c(mi,ma), col='red', pch=20, xlab='', ylab='', xaxt='n', yaxt='n',cex=1.5)

par(new=T)
plot(milis(time),p,type='p',xlim=c(st,en),ylim=c(mi,ma), col='blue', pch=21, xlab='', ylab='', xaxt='n', yaxt='n' ,cex=1.3)


dev.off()

It assumes the following trade file in /home/jbg/work/myEclipse/myR/produceTimeSeriesAndTradesAndAverages/trades_2682.txt

CTH Init AUD_USD 0.15: 1.0E-4 @ 0.97627 (0.97609, 0.97627, Tue Jul 22 14:30:34 CEST 2008) id=2674
CTH Upd AUD_USD 0.15: 1.0E-4 @ 0.97476 (0.97458, 0.97476, Tue Jul 22 15:30:25 CEST 2008) id=2674 0
CTH Upd AUD_USD 0.15: 2.0E-4 @ 0.97319 (0.97301, 0.97319, Tue Jul 22 15:56:32 CEST 2008) id=2674 0
...

And a bash script aux.sh:

#!/bin/bash 

if [ $# -lt 4 -o $# -gt 4 ]
then
        echo "Usage: sh make.sh [instrument] [from] [to] [resolution: 1 high, ..., 10 low]"
        exit 1
fi

export INST=$1
export RES=$4
export START=$2
export END=$3


#price curve
echo "select time, bid, ask from $INST where time > '$START' AND time < '$END' and quoteId%'$RES'=0 order by time asc;"
	echo "select time, bid, ask from $INST where time > '$START' AND time < '$END' and quoteId%'$RES'=0 order by time asc;" | mysql -uUSR -pPWD -hH tickdb > mySQLresult.dat 

You can get the some data from

http://www.olsen.ch/more/datasets/

or the attached example file

http://j-node.homeip.net/tech_wiki/images/b/be/MySQLresult.txt

Find Longest Time difference in a Table of Values

From a list like this

[USD_JPY,; 0.15,; pos-1331]:; INIT; -1.0E-4; 118.954; Tue; Feb; 27; 15:27:38; 2007); id=2612
[USD_JPY,; 0.15,; pos-1331]:; PROFIT; 1.0E-4; 118.917; Tue; Feb; 27; 15:43:41; 2007); id=2612

find the longest time interval and the associated id using this

dat <- read.table("/home/me/trades.txt",sep=";",stringsAsFactors=F)

ids <- dat[,12]

realIds <- as.numeric(strsplit(ids[1], '=')[[1]][2])
for (i in 2:length(ids)) {
	realIds <- c(realIds, as.numeric(strsplit(ids[i], '=')[[1]][2]))
}

ind <- order(realIds)

j <- 1
from <- ''
to <- ''
problem <- 0
dt <- 0
myId <- 0
while (j < length(realIds) - 1) {
	if (abs(as.numeric(realIds[ind[j]]) - as.numeric(realIds[ind[j+1]])) > 0) {
		problem <- c(problem, realIds[j])
		j <- j + 1
	}
	
	yr <- dat[ind[j],11]
	mo <- dat[ind[j],8]
	time <- paste(substring(mo, 2, nchar(mo)), ' ', dat[ind[j],9], '', dat[ind[j],10], '', substring(yr, 1, nchar(yr)-1), sep="")
	dateFrom <- as.POSIXct(time,format='%b %d %H:%M:%S %Y',tz="CET")
	sFrom <- format(dateFrom,"%s")
	
	yr <- dat[ind[j+1],11]
	mo <- dat[ind[j+1],8]
	time <- paste(substring(mo, 2, nchar(mo)), ' ', dat[ind[j+1],9], '', dat[ind[j+1],10], '', substring(yr, 1, nchar(yr)-1), sep="")
	dateTo <- as.POSIXct(time,format='%b %d %H:%M:%S %Y',tz="CET")
	sTo <- format(dateTo,"%s")
	myId <- c(myId, realIds[ind[j]])
	
	dt <- c(dt, as.numeric(as.numeric(sTo)-as.numeric(sFrom)))
	#from <- c(from, dat[ind[j],12])
	#to <- c(to, dat[ind[j+1],12])
	j <- j + 2
}

days <- max(dt) / 60 / 60 /24
ddId <- myId[which(dt == max(dt))]

Exposure

Enlarge

From a file with entries like

[AUD_USD,; 0.15,; pos-1]:; INIT; -1.0E-4; 0.7407; Thu; Dec; 01; 09:54:35; 2005); id=1
[AUD_USD,; 0.3,; pos-1]:; INIT; -1.0E-4; 0.7428; Thu; Dec; 01; 18:29:11; 2005); id=1

compile graph

dat <- read.table("/home/work/trades.txt",sep=";",stringsAsFactors=F)

lgth <- length(dat[[1]])

str2date <- function(t) as.POSIXct(t, format="%b %d %H:%M:%S %Y", tz="GMT",origin="1970-01-01 00:00:00")

# Thresh
thr <- dat[[2]][1]

for (i in 2:lgth) {
  thr <- c(thr, dat[[2]][i] )
}
thr <- unique(thr)


# Exp
exp <- list()
mat <- matrix(0, nrow=lgth, ncol=length(thr))
date.str <- data.frame( xDate = rep(str2date("Jan 1 00:00:00 1970"),lgth))

for (i in 1:lgth) {
  exp <- c(exp, as.numeric(dat[[5]][i]) )
  
  t <- dat[[2]][i]
  mat[i, which(t==thr)] = as.numeric(dat[[5]][i])
  
  mo <- dat[[8]][i]
  date <- dat[[9]][i]
  time <- dat[[10]][i]
  yr <- dat[[11]][i]
  time <- paste(substring(mo, 2, nchar(mo)), ' ', date, '', time, '', substring(yr, 1, nchar(yr)-1), sep="")
  date.str$xDate[i] = str2date(time)
 }
exppf <- cumsum(exp)

# Plot
plot(exppf,type='l')

plot(exp, type='l')

# Time axis
pdf(paste("~/exp.pdf",sep="")) #width=20,height=6);
par(mar=c(4.5,4.5,3,5))
col <- c('cyan','gray60','green','blue','black','pink','orange','yellow','magenta','darkgreen','gray')
maxy <- max(exppf) #max(cumsum(mat[,]))
miny <- min(exppf) #min(cumsum(mat[,]))
for (i in 1:length(thr)) {
  plot(date.str$xDate, cumsum(mat[,i]), ylim=c(miny, maxy), type='l', col=col[i], xlab='', ylab='', yaxt='n', xaxt='n')
  par(new=T)
}
legend("topleft", legend=thr, lty=matrix(1, nrow=1, ncol=length(thr)), col=col, cex=1)
par(new=T)
plot(date.str$xDate, exppf, type='l', ylim=c(miny, maxy), ylab='Exposure', xlab='')
par(new=T)
plot(date.str$xDate, dat[[6]], type='l', col='red', xlab='Time', ylab='', yaxt='n', xaxt='n', main=dat[[1]][1])
axis(4)
mtext('Price', side=4, pad=4)
dev.off()

Variants

From

[AUD_USD,; 0.15,; pos-1]:; INIT; -1.0E-4; 0.7407; Thu; Dec; 01; 09:54:35; 2005); id=1
[AUD_USD,; 0.15,; pos-1]:; PROFIT; 1.0E-4; 0.7403; Thu; Dec; 01; 10:03:57; 2005); id=1

do

thresh <- '0.15'
dat <- read.table("/home/work/trades0.15.txt",sep=";",stringsAsFactors=F)
minExp <- 0.1
### CODE
str2date <- function(t) as.POSIXct(t, format="%b %d %H:%M:%S %Y", tz="GMT",origin="1970-01-01 00:00:00")
date2millis <- function(x) format(as.POSIXct(x, format="%b %d %H:%M:%S %Y", tz="GMT",origin="1970-01-01 00:00:00"), format="%s", tz="GMT")

lgth <- length(dat[[1]])

# Thresh
id <- dat[[12]][1]

for (i in 2:lgth) {
  id <- c(id, dat[[12]][i] )
}
id <- unique(id)


# Exp
mat <- matrix(0, nrow=lgth, ncol=length(id))

date.str <- data.frame( xDate = rep(str2date("Jan 1 00:00:00 1970"),lgth))
#date.millis <- array()

for (i in 1:lgth) {
	t <- dat[[12]][i]
  	mat[i, which(t==id)] = as.numeric(dat[[5]][i])
  	mo <- dat[[8]][i]
  	date <- dat[[9]][i]
  	time <- dat[[10]][i]
  	yr <- dat[[11]][i]
  	time <- paste(substring(mo, 2, nchar(mo)), ' ', date, '', time, '', substring(yr, 1, nchar(yr)-1), sep="")
	date.str$xDate[i] = str2date(time)
	#date.millis[[i]] = as.numeric(date2millis(time))
}
exp <- list()
for (i in 1:lgth) {
	exp[[i]] = sum(mat[i,])
}
exppf <- cumsum(exp)
maxy <- max(exppf)
miny <- min(exppf)


# Plot
pdf(paste("~/expThr", thresh, ".pdf",sep="")) #width=20,height=6);
par(mar=c(4.5,4.5,3,5))
col <- c('cyan','green','blue','pink','orange','yellow','magenta','darkgreen')
leg <- list()
myCol <- list()
myI <-1
for (i in 1:length(id)) {
	if (max(cumsum(mat[,i])) > minExp || min(cumsum(mat[,i])) < -minExp) {
  		plot(date.str$xDate, cumsum(mat[,i]), ylim=c(miny, maxy), col=col[myI%%length(col)+1], type='l', xlab='', ylab='', xaxt='n', yaxt='n', xaxt='n')
 		par(new=T)
		leg[[myI]] = id[i]
		myCol[[myI]] = (col[myI%%length(col)+1])
		myI <- myI +1
	}
}
legend("topleft", legend=leg, lty=matrix(1, nrow=1, ncol=length(myCol)), col=as.character(myCol), cex=1)
par(new=T)
plot(date.str$xDate, exppf, type='l', ylim=c(miny, maxy), ylab='Exposure', xlab='')
par(new=T)
plot(date.str$xDate, dat[[6]], type='l', col='red', xlab='Time', ylab='', yaxt='n', xaxt='n', main=paste(dat[[1]][1], thresh,sep=''))
axis(4)
mtext('Price', side=4, pad=4)
dev.off()


#### Zoom
pdf(paste("~/expThrZoom", thresh, ".pdf",sep="")) #width=20,height=6);
par(mar=c(4.5,4.5,3,5))
start <- which(date.str$xDate>=str2date("Jul 05 0:0:0 2008"))
stop <- which(date.str$xDate<=str2date("Sep 15 0:0:0 2008"))
axseq <- seq(date.str$xDate[min(start)], date.str$xDate[max(stop)], by='7 days')
indices <- seq(min(start), max(stop), by=1)

leg <- list()
myCol <- list()
myI <-1
checkMyId <- array()
for (i in 1:length(id)) {
	if (max(cumsum(mat[,i])) > minExp || min(cumsum(mat[,i])) < -minExp) {
		plot(date.str$xDate[indices], cumsum(mat[indices,i]), ylim=c(miny, maxy), col=col[myI%%length(col)+1], type='l', xlab='', ylab='', xaxt='n', yaxt='n', xaxt='n')
		par(new=T)
		leg[[myI]] = id[i]
		myCol[[myI]] = (col[myI%%length(col)+1])
		checkMyId[[myI]] = i;
		myI <- myI +1
	}
}
legend("topleft", legend=leg, lty=matrix(1, nrow=1, ncol=length(myCol)), col=as.character(myCol), cex=1)
par(new=T)
plot(date.str$xDate[indices], exppf[indices], type='l', ylim=c(miny, maxy), ylab='Exposure', xlab='', xaxt='n')
axis(1, at = axseq, col='black', labels = format(axseq, format='%Y-%m-%d') );
par(new=T)
plot(date.str$xDate[indices], dat[[6]][indices], type='l', col='red', xlab='Time', ylab='', yaxt='n', xaxt='n', main=paste(dat[[1]][1], thresh,sep=''))
dev.off()

From

SLC [AUD_USD, 0.7]: FACTOR: factor=1.0470715806034647; B=-1.8496897492911895; C=2.010125989779137 [N=6.720679513093938 -> 7.037032520504592; 0.7]

do

dat <- read.table("/home/jme/sl.txt",sep=" ",stringsAsFactors=F)

lgth <- length(dat[[1]])

# Thresh
thr <- dat[[3]][1]

for (i in 2:lgth) {
  thr <- c(thr, dat[[3]][i] )
}
thr <- unique(thr)


# Exp
val <- matrix(0, nrow=lgth/length(thr), ncol=length(thr))
factor <- matrix(0, nrow=lgth/length(thr), ncol=length(thr))
ind <- matrix(1, nrow=1, ncol=length(thr))

for (i in 1:lgth) {
  	for (t in thr) {
    	if (t == dat[[3]][i]) {
			myi <- ind[1, which(t==thr)]
      		val[myi, which(t==thr)] = as.numeric( substring(dat[[8]][i], 4, nchar(dat[[8]][i])) )
			factor[myi, which(t==thr)] = as.numeric( substring(dat[[5]][i], 8, nchar(dat[[5]][i])-1) )
			ind[1, which(t==thr)] <- ind[1, which(t==thr)] + 1
      	}
  	}
}

# Plot
pdf(paste("/home/me/sl_factor.pdf",sep="")) #width=20,height=6);
col <- c('cyan','gray60','green','blue','black','pink','orange','yellow','magenta','darkgreen','gray')
maxy <- max((factor[,]))
miny <- min((factor[,]))
for (i in 1:length(thr)) {
	plot(factor[,i], ylim=c(miny, maxy), type='l', col=col[i], ylab='Factor', main=dat[[2]][1])
	par(new=T)
}
legend("topleft", legend=thr, lty=matrix(1, nrow=1, ncol=length(thr)), col=col, cex=1)
dev.off()

pdf(paste("/home/me/sl_val.pdf",sep="")) #width=20,height=6);
col <- c('cyan','gray60','green','blue','black','pink','orange','yellow','magenta','darkgreen','gray')
maxy <- max((val[,]))
miny <- min((val[,]))
for (i in 1:length(thr)) {
  plot(val[,i], ylim=c(miny, maxy), type='l', col=col[i], ylab='SL Value', main=dat[[2]][1])
  par(new=T)
}
legend("topleft", legend=thr, lty=matrix(1, nrow=1, ncol=length(thr)), col=col, cex=1)
dev.off()

Problems

Labelling Axis with Dates using axis.Date

If you get

Error in axis(side, at = z, labels = labels, ...) :
 'at' and 'labels' lengths differ, 2 != 3

for

axis.Date(1, at = Z, labels = format(Z, format='%m-%d'))

where Z and format(Z, format='%m-%d') have the same length, then the problem is

one of the values of Z is outside of the current range of the plot.