R: Learning by Doing
From TechWiki
R by example.
For a quick start to the R statistical computing program, goto R...
Contents |
Used Datasets
- http://j-node.homeip.net/tech_wiki/images/f/f3/Eur_usd_lb.txt
- http://j-node.homeip.net/tech_wiki/images/2/25/Eur_usd-small.txt
- http://j-node.homeip.net/tech_wiki/images/f/fa/Chf_jpy-small.txt
- http://j-node.homeip.net/tech_wiki/images/7/77/Ozone.txt
E.g., Media:Ozone.xls
R Code
As file:
Embedded:
#####Data Manipulation
## First Steps
# Load data frame
mydata <- read.table("http://j-node.homeip.net/tech_wiki/images/7/77/Ozone.txt")
series <- scan("http://j-node.homeip.net/tech_wiki/images/2/25/Eur_usd-small.txt")
myseries <- as.ts(series)
# Look at data
mydata
summary(mydata)
str(mydata)
coef(mydata)
names(mydata)
# Visualize
plot(mydata)
pairs(mydata, pch='.', gap=0)
# Objects
ls()
# Extracting elements
mydata$day
mydata[30,2]
mydata[30:35,2:3]
mydata[30,]
mydata[,2]
mydata[,'vdht']
mydata[1:30,-1]
# New Columns
mydata[,'lvdh'] <- log(mydata[,'vdht'])
## Creating Objects
#Concatenating Strings
str <- 'Fractional Brownian Motion'
paste(c(str, " PDF"), collapse=""))
#Vectors
cat("name1 name2 name3", "2 3 5 7", "11 13 17", "111 222 333", file="example.dat", sep="\n")
vec <- numeric(10)
vec <- 1:5
vec2 <- c(1,3,2)
vec <- c(vec2, 4,5,6)
vec <- c("A","B","C")
vec <- mydata[,2]
vec <- seq(0, 7, by=1.2)
vec <- seq(-1, 1, length=10)
vec <- rep(c(1,2,3), 5)
vec <- rep(c(1,2,3), length=8)
vec <- (1:5)>3
vec <- seq(0, 7, by=1.2)
vec[c(1,3,6)]
vec[-2]
vec[-c(1,3,6)]
which(vec == 2.4)
which.max(vec)
match(vec,6.0)
# Matrices
x <- 1:4
y <- 5:8
mat <- cbind(x, y)
mat <- rbind(x, y, x+y)
mat <- diag(rep(1,10))
mat <- as.matrix(mydata)
mat1 <- matrix(2, nrow=2, ncol=3)
mat2 <- matrix(3, nrow=3, ncol=4)
mat1 %*% mat2
t(mat1)
mat1[,3] <- log(mat1[,1])
mat1[2,] <- log(mat1[1,])
mat1[mat1>1]
mat1[mat1>1 & mat1<3]
(2:5) ^ c(2, 3, 1, 0)
# Checking
is.matrix(mydata)
is.data.frame(mydata)
is.ts(mydata)
mode(mydata)
class(mydata)
##### Plotting
# Matrix
matplot(mat)
# Special
par(mfrow=c(2,3))
par(new=TRUE)
# Plot
plot(x, y, xlab = 'x-axis', ylab = 'y-axis', main = 'Title', cex=0.6, col="gray", ylim=c(1, 3))
# Additional Features
B <- 1.0
err = 0.1
y <- 1
x1 <- -3
x2 <- -2
x3 <- -1
x4 <- 0
txt <- c("B:", B, " +/- ", err)
text(c(x1, x2, x3, x4), y, txt)
legend(0, 0, legend=(c("A", "B", "C")), lty=c(1,3,5), col=c(1,2,3), cex=1)
rug(x)
abline(0.6,0.97)
x <- seq(-4,4,by=0.5)
plot(x,dnorm(x,mean=0,sd=0.9))
lines(x,dnorm(x ,mean=0,sd=0.9))
# Handling Plots
X11()
pdf('name.pdf')
#postscript('name.ps')
dev.off()
##### Defining Functions
myFct <- function(one, two, three) {
f.max <- max(one, two, three)
f.vec <- c(one, two, three)
c(max=f.max, vec=f.vec)
}
val <- myFct(1,2,3)
val[1]
val['max']
myFct <- function(one, two, three) {
return(sum(one, two, three))
}
numF <- function(x) x + 4 * cos(7*x)
##### Simple Functions
vec <- rep(c(1,2,3), length=8)
sqrt(vec)
max(vec)
sum(vec)
mean(vec)
sd(vec)
var(vec)
range(vec)
apply(mydata, 1, mean)
apply(mydata, 2, mean)
##### Advanced Functions
# Histogram
hist(myseries, breaks=500)
# Density (PDF)
plot(density(myseries))
plot(density(myseries, bw=0.00000001, kernel = "epanechnikov", n=400,from =-.1, to = .1, adjust = 1, cut = 1))
quantile(myseries)
# CDF
plot(ecdf(myseries))
# Correlation
acf(myseries)
series2 <- scan("http://j-node.homeip.net/tech_wiki/images/f/fa/Chf_jpy-small.txt")
ccf(myseries, series2)
## Linear Regression: Scaling Laws
See more below...
dat <- read.table("http://j-node.homeip.net/tech_wiki/images/f/f3/Eur_usd_lb.txt")
ldat <- log(dat)
# Select interval
Min <- -3.75
Max <- 1
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]
# Compute and plot
plot(X, Y, xlab = 'log(delta x) [%]', ylab = 'log(delta x) [%]', main = 'EUR_USD Lookback')
result <- lm(Y~X)
coefficients(result)
summary(result)
result$coef
coef(result)
result$resid
resid(result)
summary(result)$coefficients[1,1]
abline(result$coef)
plot(result)
# Error propagation
A <- summary(result)$coefficients[1,1]
dA <- summary(result)$coefficients[1,2]
B <- summary(result)$coefficients[2,1]
dB <- summary(result)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
# Formatting the output
Etxt <- c("E =", B, "+/-", dB)
Ctxt <- c("C =", C, "+/-", C.err)
xmin <- min(X)
ymax <- max(Y)
xrange <- max(X) - min(X)
yrange <- max(Y) - min(Y)
xmin <- xmin - xrange/xmin*0.1
text(c(xmin, xmin + xrange*0.18, xmin + xrange*0.36, xmin + xrange*0.55), ymax, Etxt)
text(c(xmin, xmin + xrange*0.21, xmin + xrange*0.42, xmin + xrange*0.62), ymax-yrange*0.07, Ctxt)
## More Regression
plot(mydata)
# Outlier
out <- which.max(mydata[,'wdsp'])
mydata <- mydata[-out,]
# Transform Data
newdata <- subset(transform(mydata,logupo3=log(upo3)),select=-upo3)
# Linear
lin <- lm(logupo3 ~ ., data=newdata)
# Nonparametric: Y = m(X) + e
# Local Polynomial (loess)
lp <- loess(Y ~ X)
# Smoothing Splines => Degrees of Freedom
df <- lp$trace.hat
ss <- smooth.spline(X, Y,df=df)
# (Nadaraya-Watson) Kernel Regression => Bandwith
kr <- ksmooth(X, Y, bandwidth=0.1)
# Check
plot(X, Y, main="Nonparametric Regression", cex=0.6, col="gray")
lines(kr$x,kr$y, col=1)
lines(X, predict(lp, x=X),lty=3, col=2, cex=0.5)
lines(X, predict(ss, x=X)$y, lty=5, col=3)
legend(0, 0, legend=(c("Kernel", "Local", "Splines")), lty=c(1,3,5), col=c(1,2,3), cex=1)
rug(X)
# Flexible Regression: Additive Models
library(mgcv)
genaddmod <- gam(logupo3 ~ s(vdht)+s(wdsp)+s(hmdt)+s(sbtp)+s(ibht)+s(dgpg)+s(ibtp)+s(vsty)+s(day), data=newdata)
plot(genaddmod)
## Evaluation
plot(fitted(lin), resid(lin))
qqnorm(resid(lin))
qqline(lin$res)
plot(lin)
plot(predict(lp), Y)
plot(predict(ss)$y, Y)
##### Misc
example(lm)
?lm
set.seed(33)
source('./tmp.R')
cat("summary(result)", summary(result)$coef)
# Loops
for (i in 1:10) {
# ...
}
myStr <- c('a\n', 'bb\n', 'ccc\n')
for (str in myStr) {
cat(str)
}
# Assignment Example
inc <- matrix(runif(100), nrow=50, ncol=2,byrow=FALSE)
new <- 1
for (i in 2:length(inc[,1])) {
new[i] <- new[i-1] + inc[i,1]
}
search()
install.packages("earth")
library("earth")
q()
### MySQL
# install.packages("RMySQL")
# Execute MySQL for an array of strings
library(RMySQL)
drv <- dbDriver("MySQL")
con <- dbConnect(drv, "db", "pwd", "usr", "host")
ccy <- c('AUD_JPY', 'USD_JPY')
sink("ccy.txt")
for (instr in ccy) {
dbListTables(con)
qryStr <- paste("SELECT COUNT(*) FROM ", instr, " WHERE time >= '2003-01-01 00:00:00' and time <= '2008-01-01 00:00:00' ORDER BY time;", collapse="")
res <- dbSendQuery(con, qryStr)
data <- fetch(res, n = -1)
cat( instr, data[1,1], '\n')
dbClearResult(res)
}
sink()
dbClearResult(res)
dbDisconnect(con)
## Extentions:
# Java: http://swik.net/sjava, http://rosuda.org/JGR/
# Web-GUI: http://www.rpad.org/Rpad/
##### On and on...
# round
# sample
# solve
# step
# class
# sapply
# earth; library(earth); actuall new MARS implementation
# gam; library(mgcv)
# rbinom, rpois, rhyper
# runif, rnorm, rlnorm, rt, rF, rchisq, rgamma
Focus: Time Series Analysis
Resources
- FracSim: CRAN: FracSim, FracSim.pdf, ups-tlse.fr:FracSim
- tseries: CRAN: tseries, tseries.pdf
R Code
Simple Gaussian random walk:
n <- 1000000
rnorm(1000000,0,pi)
x <- rep(NA,n+1)
x[1] <- 11000
for (i in 1:n) {
x[i+1] <- x[i]+dx[i]
}
lx <- log(x)
dlx <- diff(lx)
Some timeDate stuff (compute distribution of deltas and extract mean and variance as double values):
t <- timeDate(data[,2]) dt <- diff.timeDate(t) dsty <- density(dt) sig <- sd(dt) mu <- mean(dt) sig <- structure(sig,class="double")[1] mu <- structure(mu,class="double")[1]
Load a time series from MySQL a table
+----+---------------------+---------+---------+ | Id | time | bid | ask | +----+---------------------+---------+---------+ | 1 | 2004-06-30 12:09:02 | 1.21532 | 1.2155 | | 2 | 2004-06-30 12:09:06 | 1.21534 | 1.21552 | | 3 | 2004-06-30 12:09:09 | 1.21524 | 1.21542 | +----+---------------------+---------+---------+
and plot mid prices and deltas (using diff()):
library(fCalendar)
library(RMySQL)
drv <- dbDriver("MySQL")
rdat <- dbConnect(drv, "table", "usr", "pwd", "host")
res <- dbSendQuery(rdat, "SELECT * FROM table where time >= '2004-08-02 00:00:00' order by Id asc LIMIT 300000;")
ts <- fetch(res, n = -1)
x <- timeDate(ts[,2])
y <- (ts[,3] + ts[,4]) / 2
par(mfrow=c(2,1))
plot.timeDate(x,y, type='l',xlab = 'Time', ylab = 'Price', main = 'Mid Prices')
plot.timeDate(x[-1],diff(y), type='l', xlab = 'Time', ylab = 'Price Change', main = 'Deltas')
Plot stuff:
plot(density(diff(log(y))), xlab = 'Price Move', ylab = 'Probability', main = 'PDF Log Returns') plot(ecdf(diff(log(y))), xlab = 'Price Move', ylab = 'Frequency', main = 'CDF Log Returns') acf(diff(log(y)), main = 'Autocorrelation Log Returns') pacf(diff(log(y)), main = 'Partial Autocorrelation Log Returns')
And using the ts object assigned above
library(tseries) irts <- irts(ts[,2],y) weekday(irts) is.weekend(irts)
Load time series from MySQL database, plot its dx and dt densities, and overlay with normal distribution with same mean and variance:
### Empirical values of (theoretical) Gaussian random walk
library(fCalendar)
label <- 'name'
## Load DB
library(RMySQL)
drv <- dbDriver("MySQL")
con <- dbConnect(drv, "db", "usr", "pwd", "host")
postscript(paste(label, '.ps', sep = ""))
par(mfrow=c(1,2))
qry <- paste('SELECT * FROM ', paste(label, ';', sep = ""), sep = "")
res <- dbSendQuery(con, qry)
data <- fetch(res, n = -1)
# Deltas
x <- (data[,3]+data[,4])/2
lx <- log(x)
llx <- (log(data[,3])+log(data[,4]))/2
t <- timeDate(data[,2])
dt <- diff.timeDate(t)
#which(dt <= 0)
#which(dt > 500)
dx <- diff(x)
dlx <- diff(lx)
dllx <- diff(llx)
dsty <- density(dt)
sigma <- sd(dt)
mu <- mean(dt)
plot(density(rnorm(1000000, mu, sigma)), xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), type='p', col='red', main=label, xlab='dt')
par(new=TRUE)
plot(dsty, xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), main=label, xlab='dt', lwd=2)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 8, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 10, mu)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 8, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 25, sigma)
dsty <- density(dx)
sig <- sd(dx)
mu <- mean(dx)
plot(density(rnorm(1000000, mu, sig)), xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), type='p', col='red', main=label, xlab='dx')
par(new=TRUE)
plot(dsty, xlim=c(min(dsty$x), max(dsty$x)), ylim=c(min(dsty$y), max(dsty$y)), main=label, xlab='dx', lwd=2)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 6, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 10, mu)
text(min(dsty$x) + (max(dsty$x) - min(dsty$x)) / 6, max(dsty$y) - (max(dsty$y) - min(dsty$y)) / 25, sigma)
dev.off()
# Write to file
modelSig <- 1/6769.9
sink(paste(label, '.stats', sep = ""))
cat('sigma_dx: ', abs(sig-modelSig)/modelSig *100)
sink()
Linear regression of data and inset plots of distributions:
library(gridBase)
library(Hmisc)
postscript("GRW_GRW_N_TCKS.ps", horizontal=T, width=300, height=300)
#x11()
#par(mar=c(5,5,4,2))
#par(mfrow=c(2,4))
# Get data
dat <- read.table("ts.dat")
# Approx. percentage
dat$V1 <- dat$V1*100
ldat <- log(dat)
# Select
Min <- log(0.02)
Max <- max(ldat$V1)
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]
# Compute
res <- lm(Y~X)
# Error propagation
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
slEmpRSx <- function(t) (t/C)^B
# Plot
plot(dat, xlim=c(exp(min(X)), exp(max(X))), ylim=c(exp(min(Y)), exp(max(Y))), xlab = 'XLAB', ylab = 'YLAB', main = 'TITLE', cex.axis=1.5, log='xy')
res <- lm(Y~X)
#grid(col='black')
#minor.tick(nx=10, ny=10, tick.ratio=0.6)
par(new=TRUE)
plot(slEmpRSx, exp(min(X)), exp(max(X)), ylim=c(exp(min(Y)), exp(max(Y))), log='xy', col='red', cex.axis=2, lwd=2, yaxt="n", xaxt="n", xlab = 'XLAB', ylab = 'YLAB', main = 'TITLE')
arrows(0.08,190,0.1,slEmpRSx(0.1),angle=15,code=2,lwd=2)
arrows(2,200,3,slEmpRSx(3),angle=15,code=2,lwd=2)
# Inset Plot
vp <- baseViewports()
pushViewport(vp$inner,vp$figure,vp$plot)
pushViewport(viewport(x=0.1,y=0.95,width=.38,height=.38,just=c("left","top")))
par(plt=gridPLT(),new=T)
grid.rect(gp=gpar(lwd=1,col="black"))
ccy1 <- read.table("ts.obs")
dsty <- density(ccy1[,1])
maxX <- max(dsty$x)
maxY <- max(dsty$y)
inc <- (maxY -min(dsty$y))/15
plot(dsty, xlab = '', ylab = '', main = '')
text(maxX,maxY-inc*0.6,'ITITLE', 1)
text(maxX,maxY-2*inc,'I1', 1)
rug(ccy1$V1)
popViewport(4)
# Inset Plot
#vp <- baseViewports()
pushViewport(vp$inner,vp$figure,vp$plot)
pushViewport(viewport(x=0.95,y=0.1,width=.38,height=.38,just=c("right","bottom")))
par(plt=gridPLT(),new=T)
grid.rect(gp=gpar(lwd=1,col="black"))
ccy2 <- read.table("ts.obs")
dsty <- density(ccy2[,1])
maxX <- max(dsty$x)
maxY <- max(dsty$y)
inc <- (maxY -min(dsty$y))/15
plot(dsty, xlab = '', ylab = '', main = '')
text(maxX,maxY-inc*0.6,'ITITLE', 1)
text(maxX,maxY-2*inc,'I2', 1)
rug(ccy2$V1)
popViewport(4)
# Formatting the output
Etxt <- c("E =", B, "+/-", dB)
Ctxt <- c("C =", C, "+/-", C.err)
#sink(file="MAIN.para", append=TRUE)
cat("NAME", "\t", Etxt, "\t", Ctxt, "\t", "#data ", length(X), "\trange ", exp(Min), " - " , exp(Max), "\n")
#sink()
dev.off()
R and Bash
V1
Sometimes you need to automize the creation of R-scripts. Here, in a directory containing the files
AUD_JPY-DELTA_DC-2002-12-01_00:00:00-2007-12-01_00:00:00.dat EUR_USD-DELTA_DC-2002-12-01_00:00:00-2007-12-01_00:00:00.dat ...
which consist of x and y values
1.0E-4 19.479773500345686 1.0253151205244288E-4 19.69857676560725 1.0512710963760242E-4 19.97171414407603 ...
an estimation using a linear model is required.
The following bash script
#!/bin/bash
# Data
DAT=`ls $DIR | grep '.dat'`
# Scaling law name
SL=`echo {$DAT[1]} | cut -f 2 -d '-'`
rm $SL.R
# Header
echo 'postscript("'$SL'.ps")' >> $SL.R
echo 'par(mar=c(5,5,4,2))' >> $SL.R
echo 'par(mfrow=c(2,3))' >> $SL.R
# Loop
for n in $DAT
do
# Instrument
CCY=`echo $n | cut -c1-7`
# File to source in R
echo 'source("'$CCY'.R")' >> $SL.R
# Modify template to plot current scaling law
sed -e "s/###DAT###/$n/g" -e "s/###CCY###/$CCY/" -e "s/###X###/dx (%)/" -e "s/###Y###/os dt (sec)/"
-e "s/###MIN###/min(ldat"'$V1'")/g" -e "s/###MAX###/max(ldat"'$V1'")/g" -e "s/###XMULT###/100/g"
-e "s/###YMULT###/1/g" < ./template.R > $CCY.R
done
# Footer
echo 'dev.off()' >> $SL.R
will take the R template file (template.R)
### Data
dat <- read.table("###DAT###")
lab <- '###CCY###'
xlabT <- '###X###'
ylabT <- '###Y###'
### Estimate
# Approx. percentage
dat$V1 <- dat$V1*###XMULT###
dat$V2 <- dat$V2*###YMULT###
ldat <- log(dat)
Min <- ###MIN###
Max <- ###MAX###
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]
res <- lm(Y~X)
# Error propagation
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
slEmpRSx <- function(t) (t/C)^B
### Plot
plot(dat, xlim=c(exp(min(X)), exp(max(X))), ylim=c(exp(min(Y)), exp(max(Y))), type='o', ylab = ylabT, xlab = xlabT, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black', log='xy', yaxt="n", xaxt="n")
par(new=TRUE)
plot(slEmpRSx, exp(min(X)), exp(max(X)), ylim=c(exp(min(Y)), exp(max(Y))), log='xy', col='red', ylab = ylabT, xlab = xlabT, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=2, yaxt="n", xaxt="n")
minv <- round(exp(min(Y)),digits=2)
maxv <- round(exp(max(Y)),digits=1)
axis(2, at = format(minv, digits=1,sci = TRUE), cex.axis=2)
axis(2, at = format(exp(max(Y)), digits=1,sci = TRUE), cex.axis=2, labels=TRUE)
minx <- round(exp(min(X)),digits=2)
maxx <- round(exp(max(X)),digits=0)
axis(1, at = minx, cex.axis=2)
axis(1, at = maxx, cex.axis=2)
# Formatting the output
Etxt <- paste(c("E = ", format(B, digits = 5), " +/- ", format(dB*4, digits = 2)), collapse="")
Ctxt <- paste(c("C = ", format(C, digits = 4), " +/- ", format(C.err*2, digits = 3)), collapse="")
text(exp(Min), exp(max(Y)), Etxt, col='red', cex=1.5, 0)
text(exp(Min), exp(max(Y)) - exp(max(Y))/1.8, Ctxt, col='red', cex=1.5, 0)
and replace all the ### tags with custom values, and generate R files called
AUD_JPY.R EUR_USD.R ...
for every .dat file in the directory. In addition, a master R file is created, called
DELTA_DC.R
which contains all the custom R files
postscript("DELTA_DC.ps")
par(mar=c(5,5,4,2))
par(mfrow=c(2,3))
source("AUD_JPY.R")
source("AUD_USD.R")
...
dev.off()
and will generate a postscript file, containing all the individual plots. To generate this, start an R session in the directory, and type
source('DELTA_DC.R')
V2
To automatically generate this R file, displaying multiple curves with a legend on one plot
postscript("X-single.ps")
par(mar=c(5,5,4,2))
par(mfrow=c(1,1))
maxY <- log(7)
minY <- log(0.005)
source("AUD_JPY.R")
par(new=TRUE)
source("AUD_USD.R")
par(new=TRUE)
source("CHF_JPY.R")
par(new=TRUE)
source("EUR_AUD.R")
par(new=TRUE)
source("EUR_CHF.R")
par(new=TRUE)
source("EUR_GBP.R")
par(new=TRUE)
source("EUR_JPY.R")
par(new=TRUE)
source("EUR_USD.R")
par(new=TRUE)
source("GBP_CHF.R")
par(new=TRUE)
source("GBP_JPY.R")
par(new=TRUE)
source("GBP_USD.R")
par(new=TRUE)
source("GRW_GRW.R")
par(new=TRUE)
source("USD_CHF.R")
par(new=TRUE)
source("USD_JPY.R")
par(new=TRUE)
legend(exp(minX), exp(maxY), legend=(c('AUD_JPY','AUD_USD','CHF_JPY','EUR_AUD','EUR_CHF','EUR_GBP','EUR_JPY','EUR_USD','GBP_CHF','GBP_JPY','GBP_USD','GRW_GRW','USD_CHF','USD_JPY')) , col=c('cyan','gray60','green','blue','black','pink','orange','yellow','magenta','darkgreen','gray','red','wheat','salmon3'), lty=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1), cex=1)
dev.off()
these files were used, i.e., first execute this bash script
#!/bin/bash
# Data
DAT=`ls $DIR | grep '.dat'`
# Scaling law name
SL=`echo {$DAT[1]} | cut -f 2 -d '-'`'-single'
# Header
echo 'postscript("'$SL'.ps")' >> $SL.R
echo 'par(mar=c(5,5,4,2))' >> $SL.R
echo 'par(mfrow=c(1,1))' >> $SL.R
#echo 'maxY <- log(3000000)' >> $SL.R
#echo 'minY <- log(1)' >> $SL.R
echo 'maxY <- log(7)' >> $SL.R
echo 'minY <- log(0.005)' >> $SL.R
# Color vector
COL=(cyan gray60 green blue black pink orange yellow magenta darkgreen gray red wheat salmon3)
# Loop
i=0
for n in $DAT
do
# Instrument
CCY=`echo $n | cut -c1-7`
LAB[i]=$CCY
# File to source in R
echo 'source("'$CCY'.R")' >> $SL.R
# Modify template to plot current scaling law
sed -e "s/###DAT###/$n/g" -e "s/###CCY###/$CCY/" -e "s/###X###/dx (%)/" -e "s/###Y###/N (dc)/" -e "s/###MIN###/min(ldat"'$V1'")/g" -e "s/###MAX###/max(ldat"'$V1'")/g" -e "s/###XMULT###/100/g" -e "s/###YMULT###/100/g" -e "s/###COL###/${COL[$i]}/g" < ./templateSingle.R > $CCY.R
echo 'par(new=TRUE)' >> $SL.R
i=`echo $i + 1|bc -l`
done
CV="c('${COL[0]}','${COL[1]}','${COL[2]}','${COL[3]}','${COL[4]}','${COL[5]}','${COL[6]}','${COL[7]}','${COL[8]}','${COL[9]}','${COL[10]}','${COL[11]}','${COL[12]}','${COL[13]}')"
CCV="c('`echo ${LAB[0]}`','`echo ${LAB[1]}`','`echo ${LAB[2]}`','`echo ${LAB[3]}`','`echo ${LAB[4]}`','`echo ${LAB[5]}`','`echo ${LAB[6]}`','`echo ${LAB[7]}`','`echo ${LAB[8]}`','`echo ${LAB[9]}`','`echo ${LAB[10]}`','`echo ${LAB[11]}`','`echo ${LAB[12]}`','`echo ${LAB[13]}`')"
#echo 'legend(exp(maxX-1), exp(maxY), legend=('$CCV') , col='$CV', lty=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1), cex=1)' >> $SL.R
echo 'legend(exp(minX), exp(maxY), legend=('$CCV') , col='$CV', lty=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1), cex=1)' >> $SL.R
# Footer
echo 'dev.off()' >> $SL.R
which uses the following R file (templateSingle.R) as a template
### Data
dat <- read.table("###DAT###")
lab <- '###CCY###'
xlabT <- '###X###'
ylabT <- '###Y###'
### Estimate
# Approx. percentage
dat$V1 <- dat$V1*###XMULT###
dat$V2 <- dat$V2*###YMULT###
ldat <- log(dat)
Min <- ###MIN###
Max <- ###MAX###
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]
res <- lm(Y~X)
maxX <- max(X)
minX <- min(X)
# Error propagation
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
slEmpRSx <- function(t) (t/C)^B
### Plot
plot(slEmpRSx, exp(minX), exp(maxX), ylim=c(exp(minY), exp(maxY)), log='xy', col='###COL###', ylab = ylabT, xlab = xlabT, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=2, yaxt="n", xaxt="n")
minv <- round(exp(minY),digits=2)
maxv <- round(exp(maxY),digits=1)
axis(2, at = format(minv, digits=1,sci = TRUE), cex.axis=2)
axis(2, at = format(exp(maxY), digits=1,sci = TRUE), cex.axis=2, labels=TRUE)
minx <- round(exp(minX),digits=2)
maxx <- round(exp(maxX),digits=0)
axis(1, at = minx, cex.axis=2)
axis(1, at = maxx, cex.axis=2)
V3
Your data is in a directory below the current one which looks like this
AUD_JPY-T_MINMAX-2002-12-01_00:00:00-2007-12-01_00:00:00.dat ... USD_JPY-T_MINMAX-2002-12-01_00:00:00-2007-12-01_00:00:00.dat
Each .dat file contains two columns with data denoting the x- and y-values.
The goal is to plot a diagram with all the data in a nice and clear way:
This means, you need to mess with
- label to axis spacing,
- legends,
- plotting multiple symbols and lines at different resolution,
- logarithmically spaced minor ticks,
- find the minimal and maximal values for all the data
- etc.
To automatize this, the following bash script reads the data in the correct directory and fills the R template file with the corresponding values.
myBash.sh:
#!/bin/bash
# Data
DAT=`ls ../ | grep '.dat'`
# Scaling law name
SL=`echo {$DAT[1]} | cut -f 2 -d '-'`'-single'
rm $SL.R
# Header
#echo 'postscript("'$SL'.ps", onefile=TRUE, horizontal=TRUE)' >> $SL.R
echo 'postscript("'$SL'.ps")' >> $SL.R
echo 'par(mar=c(5,6,4,2))' >> $SL.R
echo 'par(mfrow=c(1,1))' >> $SL.R
#Min/max
i=0
for n in $DAT
do
echo 'dat'$i' <- read.table("../'$n'")' >> $SL.R
echo 'dat'$i'$V1 <- dat'$i'$V1*100' >> $SL.R
i=`echo $i + 1|bc -l`
done
echo 'minX <- min(dat0$V1, dat1$V1, dat2$V1, dat3$V1, dat4$V1, dat5$V1, dat6$V1, dat7$V1, dat8$V1, dat9$V1, dat10$V1, dat11$V1, dat12$V1, dat13$V1)' >> $SL.R
echo 'maxX <- max(dat0$V1, dat1$V1, dat2$V1, dat3$V1, dat4$V1, dat5$V1, dat6$V1, dat7$V1, dat8$V1, dat9$V1, dat10$V1, dat11$V1, dat12$V1, dat13$V1)' >> $SL.R
echo 'minY <- min(dat0$V2, dat1$V2, dat2$V2, dat3$V2, dat4$V2, dat5$V2, dat6$V2, dat7$V2, dat8$V2, dat9$V2, dat10$V2, dat11$V2, dat12$V2, dat13$V2)' >> $SL.R
echo 'maxY <- max(dat0$V2, dat1$V2, dat2$V2, dat3$V2, dat4$V2, dat5$V2, dat6$V2, dat7$V2, dat8$V2, dat9$V2, dat10$V2, dat11$V2, dat12$V2, dat13$V2)' >> $SL.R
# Color vector
COL=(cyan gray45 green blue black pink1 orange yellow3 magenta darkgreen gray80 red brown4 salmon3)
#COL=(black black black black black black black black black black black black black black)
#LT=(15 16 17 18 1 2 5 6 7 9 10 8 11 13)
#LT=(16 1 17 2 18 23 15 22 6 12 9 8 10 11)
LT=(16 21 17 24 18 23 15 22 12 25 10 8 9 11)
length=68
step=3
# Loop
i=0
for n in $DAT
do
# Instrument
CCY=`echo $n | cut -c1-7`
LAB[i]=$CCY
# File to source in R
echo 'source("'$CCY'.R")' >> $SL.R
# Modify template to plot current scaling law
sed -e "s/###DAT###/$n/g" -e "s/###CCY###/$CCY/" -e "s/###X###/XLAB/" -e "s/###Y###/YLAB/" -e "s/###MIN###/min(ldat"'$V1'")/g" -e "s/###MAX###/max(ldat"'$V1'")/g" -e "s/###XMULT###/100/g" -e "s/###YMULT###/1/g" -e "s/###COL###/${COL[$i]}/g" -e "s/###LT###/${LT[$i]}/g" -e "s/###ST###/`echo $i*$step|bc -l`/g" -e "s/###ED###/`echo $length - $i*$step |bc -l`/g" < ./templatePaper.R > $CCY.R
echo 'par(new=TRUE)' >> $SL.R
i=`echo $i + 1|bc -l`
done
CV="c('${COL[0]}','${COL[1]}','${COL[2]}','${COL[3]}','${COL[4]}','${COL[5]}','${COL[6]}','${COL[7]}','${COL[8]}','${COL[9]}','${COL[10]}','${COL[11]}','${COL[12]}','${COL[13]}')"
CCV="c('`echo ${LAB[0]}`','`echo ${LAB[1]}`','`echo ${LAB[2]}`','`echo ${LAB[3]}`','`echo ${LAB[4]}`','`echo ${LAB[5]}`','`echo ${LAB[6]}`','`echo ${LAB[7]}`','`echo ${LAB[8]}`','`echo ${LAB[9]}`','`echo ${LAB[10]}`','`echo ${LAB[11]}`','`echo ${LAB[12]}`','`echo ${LAB[13]}`')"
LLT="c(${LT[0]},${LT[1]},${LT[2]},${LT[3]},${LT[4]},${LT[5]},${LT[6]},${LT[7]},${LT[8]},${LT[9]},${LT[10]},${LT[11]},${LT[12]},${LT[13]})"
#echo 'legend(maxX, maxY, legend=('$CCV') , LT='$CV', lty=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1), cex=1.1, xjust=1)' >> $SL.R
#echo 'legend(maxX, maxY, legend=('$CCV') , LT='$CV', pch=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14), cex=1.1, xjust=1)' >> $SL.R
#echo 'legend(minX, maxY, legend=('$CCV') , col='$CV', pch='$LLT', lty=rep(3,14), cex=1.2)' >> $SL.R
#echo 'legend(minX, maxY, legend=('$CCV') , col='$CV', pch='$LLT', lty=rep(0,14), cex=1.5, xjust=0)' >> $SL.R
echo 'par(lwd=1.5)' >> $SL.R
echo 'legend(minX, maxY, legend=('$CCV') , col='$CV', pch='$LLT', lty=rep(0,10), cex=1.3, xjust=0, yjust=1)' >> $SL.R
# Footer
echo 'dev.off()' >> $SL.R
The R template file is called
templatePaper.R:
### Data
dat <- read.table("../###DAT###")
lab <- '###CCY###'
xlabT <- '###X###'
ylabT <- '###Y###'
### Estimate
# Approx. percentage
dat$V1 <- dat$V1*###XMULT###
dat$V2 <- dat$V2*###YMULT###
### Plot
#plot(slEmpRSx, minX, maxX, ylim=c(minY, maxY), log='xy', type = "l", col='###COL###', ylab = ylabT, xlab = xlabT, cex.axis=1.5, lwd=1, main='TITLE', pch=###LT###, cex=0.75)
#par(new=TRUE)
start <- ###ST###
end <- ###ED###
pchi <- c(rep(NA,start), ###LT###, rep(NA,end));
plot(exp(X),exp(Y), xlim=c(minX, maxX), log='xy', ylim=c(minY, maxY), cex=1.5, lwd=2.5, col='###COL###', ylab = ylabT, xlab = xlabT, main='TITLE', pch=pchi, type='l', lty=3, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5, mgp=c(3.5,0,0))
par(new=TRUE)
plot(exp(X),exp(Y), xlim=c(minX, maxX), log='xy', ylim=c(minY, maxY), cex=1.5, lwd=1.5, col='###COL###', ylab = ylabT, xlab = xlabT, main='TITLE', pch=pchi, type='p', lty=3, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5, mgp=c(3.5,0,0))
cexaxis <- 1
minv <- round((minY),digits=2)
sequ <- 10^((floor(log10(minY)):ceiling(log10(maxY))))
axis(2, at = format(sequ, digits=2,sci = T), cex.axis=cexaxis)
axis.at <- 10 ^ c(floor(log10(minY)):ceiling(log10(maxY)))
sq <- 1:10 * rep(axis.at[-1] / 10, each = 10)
axis(2, at = sq[-(1:3)] , tcl = -0.3, labels = FALSE, cex.axis=cexaxis)
sequ <- 10^((floor(log10(minX)):ceiling(log10(maxX))))
axis(1, at = format(sequ, digits=2,sci = TRUE), cex.axis=cexaxis)
axis.at <- 10 ^ c(floor(log10(minX)):ceiling(log10(maxX)))
axis(1, at = 1:10 * rep(axis.at[-1] / 10, each = 10),tcl = -0.3, labels = FALSE, cex.axis=cexaxis)
Now you need to execute the script
./myBash.sh
and open R
R
and run the main R file called TCK_T-single.R
source('TCK_T-single.R')
to generate a postscript file with the polt TCK_T-single.ps.
The file TCK_T-single.R looks like
postscript("TCK_T-single.ps")
par(mar=c(5,6,4,2))
par(mfrow=c(1,1))
dat0 <- read.table("../AUD_JPY-TCK_T-2002-12-01_00:00:00-2007-12-01_00:00:00.dat")
dat0$V1 <- dat0$V1*100
...
dat13 <- read.table("../USD_JPY-TCK_T-2002-12-01_00:00:00-2007-12-01_00:00:00.dat")
dat13$V1 <- dat13$V1*100
minX <- min(dat0$V1, dat1$V1, dat2$V1, dat3$V1, dat4$V1, dat5$V1, dat6$V1, dat7$V1, dat8$V1, dat9$V1, dat10$V1, dat11$V1, dat12$V1, dat13$V1)
maxX <- max(dat0$V1, dat1$V1, dat2$V1, dat3$V1, dat4$V1, dat5$V1, dat6$V1, dat7$V1, dat8$V1, dat9$V1, dat10$V1, dat11$V1, dat12$V1, dat13$V1)
minY <- min(dat0$V2, dat1$V2, dat2$V2, dat3$V2, dat4$V2, dat5$V2, dat6$V2, dat7$V2, dat8$V2, dat9$V2, dat10$V2, dat11$V2, dat12$V2, dat13$V2)
maxY <- max(dat0$V2, dat1$V2, dat2$V2, dat3$V2, dat4$V2, dat5$V2, dat6$V2, dat7$V2, dat8$V2, dat9$V2, dat10$V2, dat11$V2, dat12$V2, dat13$V2)
source("AUD_JPY.R")
par(new=TRUE)
...
source("USD_JPY.R")
par(new=TRUE)
par(lwd=1.5)
legend(minX, maxY, legend=(c('AUD_JPY','AUD_USD','CHF_JPY','EUR_AUD','EUR_CHF','EUR_GBP','EUR_JPY','EUR_USD','GBP_CHF','GBP_JPY','GBP_USD','GRW_GRW','USD_CHF','USD_JPY')) , col=c('cyan','gray45','green','blue','black','pink1','orange','yellow3','magenta','darkgreen','gray80','red','brown4','salmon3'), pch=c(16,21,17,24,18,23,15,22,12,25,10,8,9,11), lty=rep(0,10), cex=1.3, xjust=0, yjust=1)
dev.off()
and the R data files, e.g., AUD_JPY.R look like
### Data
dat <- read.table("../AUD_JPY-TCK_T-2002-12-01_00:00:00-2007-12-01_00:00:00.dat")
lab <- 'AUD_JPY'
xlabT <- 'XLAB'
ylabT <- 'YLAB'
### Estimate
# Approx. percentage
dat$V1 <- dat$V1*100
dat$V2 <- dat$V2*1
### Plot
#plot(slEmpRSx, minX, maxX, ylim=c(minY, maxY), log='xy', type = "l", col='cyan', ylab = ylabT, xlab = xlabT, cex.axis=1.5, lwd=1, main='TITLE', pch=16, cex=0.75)
#par(new=TRUE)
start <- 0
end <- 68
pchi <- c(rep(NA,start), 16, rep(NA,end));
plot(exp(X),exp(Y), xlim=c(minX, maxX), log='xy', ylim=c(minY, maxY), cex=1.5, lwd=2.5, col='cyan', ylab = ylabT, xlab = xlabT, main='TITLE', pch=pchi, type='l', lty=3, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5, mgp=c(3.5,0,0))
par(new=TRUE)
plot(exp(X),exp(Y), xlim=c(minX, maxX), log='xy', ylim=c(minY, maxY), cex=1.5, lwd=1.5, col='cyan', ylab = ylabT, xlab = xlabT, main='TITLE', pch=pchi, type='p', lty=3, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5, mgp=c(3.5,0,0))
cexaxis <- 1
minv <- round((minY),digits=2)
sequ <- 10^((floor(log10(minY)):ceiling(log10(maxY))))
axis(2, at = format(sequ, digits=2,sci = T), cex.axis=cexaxis)
axis.at <- 10 ^ c(floor(log10(minY)):ceiling(log10(maxY)))
sq <- 1:10 * rep(axis.at[-1] / 10, each = 10)
axis(2, at = sq[-(1:3)] , tcl = -0.3, labels = FALSE, cex.axis=cexaxis)
sequ <- 10^((floor(log10(minX)):ceiling(log10(maxX))))
axis(1, at = format(sequ, digits=2,sci = TRUE), cex.axis=cexaxis)
axis.at <- 10 ^ c(floor(log10(minX)):ceiling(log10(maxX)))
axis(1, at = 1:10 * rep(axis.at[-1] / 10, each = 10),tcl = -0.3, labels = FALSE, cex.axis=cexaxis)
Now you can put this all in a LaTeX file
ps2epsi TCK_T-single.ps
and use psfrag to replace the tags in the plot with LaTeX formatted stuff...
On and on...
Conditional TS Analysis
postscript("Cond-DC-EUR_USD-0.5.ps")
par(mar=c(5,5,4,2))
par(mfrow=c(2,4))
### Data
ccy <- read.table("EUR_USD-DX-OS-LB-2002-12-01_00:00:00-2007-12-01_00:00:00-0.500.obs")
### Calc 1
lab <- 'EUR_USD'
xlab <- '0.5% OS'
delta <- 1
ylab <- '1st following OS [%]'
lgth <- length(ccy[,1])-delta
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
xy[i,2] <- ccy[i+delta, 1]
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 2
delta <- 2
ylab <- '2nd following OS [%]'
lgth <- length(ccy[,1])-delta
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
xy[i,2] <- ccy[i+delta, 1]
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 3
delta <- 3
ylab <- '3rd following OS [%]'
lgth <- length(ccy[,1])-delta
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
xy[i,2] <- ccy[i+delta, 1]
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 4
delta <- 4
ylab <- '4th following OS [%]'
lgth <- length(ccy[,1])-delta
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
xy[i,2] <- ccy[i+delta, 1]
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 5
vol <- 4
ylab <- 'Sum 4 OS [%]'
lgth <- length(ccy[,1])-vol
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
for (j in 1:vol) {
xy[i,2] <- xy[i,2] + ccy[i+j, 1]
}
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 6
vol <- 10
ylab <- 'Sum 10 OS [%]'
lgth <- length(ccy[,1])-vol
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
for (j in 1:vol) {
xy[i,2] <- xy[i,2] + ccy[i+j, 1]
}
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 7
vol <- 20
ylab <- 'Sum 20 OS [%]'
lgth <- length(ccy[,1])-vol
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
for (j in 1:vol) {
xy[i,2] <- xy[i,2] + ccy[i+j, 1]
}
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
### Calc 8
vol <- 80
ylab <- 'Sum 80 OS [%]'
lgth <- length(ccy[,1])-vol
xy <- matrix(0, ncol=2, nrow=lgth)
for (i in 1:lgth) {
xy[i,1] <- ccy[i, 1]
for (j in 1:vol) {
xy[i,2] <- xy[i,2] + ccy[i+j, 1]
}
}
plt <- xy[order(xy[,1]),]
plot(plt, ylab = ylab, xlab = xlab, main = lab, cex.axis=2, cex.lab=2, cex.main=2.5, lwd=1, col='black')
dev.off()
Java, Bash and R
Thanks Ricardo.
Bash script executing a Java program that generates a Latex document containing figures generated by R...
#!/bin/bash
NAME=WeeklyExecutionReport
BASEDIR="/usr/local/test"
#DATE=`date +%Y%m%d.%H%M%S`
DATE=`date +%Y%m%d`
REPORT=$BASEDIR/reports/ExecutionReports/LatencyReport_${DATE}.pdf
cd "$BASEDIR/reports/WeeklyExecutionReports"
if [ -d tmp ]; then
rm -R tmp
fi
mkdir tmp
VARDIR=$BASEDIR/log/$NAME
LOGOUT=$VARDIR/$NAME.log,4,40000000
/usr/local/java/bin/java -classpath $BASEDIR/some.jar:$BASEDIR/lib/mysql/mysql-connector-java-3.1.6-bin.jar ch.url.path.to.main.WeeklyExecutionReport database table name pwd $BASEDIR/reports/WeeklyExecutionReports/tmp -l $LOGOUT -d ch.url.path.to.main
R --no-save --slave < $BASEDIR/init/WeeklyExecutionReport/times.R
R --no-save --slave < $BASEDIR/init/WeeklyExecutionReport/weeklyLat.R
R --no-save --slave < $BASEDIR/init/WeeklyExecutionReport/dailyLat.R
R --no-save --slave < $BASEDIR/init/WeeklyExecutionReport/pie.R
pdflatex $BASEDIR/init/WeeklyExecutionReport/report.tex
mv $BASEDIR/reports/WeeklyExecutionReports/report.pdf ${REPORT}
mpack -s "Execution Latency Report "${DATE} -d $BASEDIR/init/WeeklyExecutionReport/mail.dat ${REPORT} me@myemail.ch
rm $BASEDIR/reports/WeeklyExecutionReports/*.aux $BASEDIR/reports/WeeklyExecutionReports/*.log
rm -R tmp
where the Java program writes to a file, e.g.,
private void createDataFile(List<Times> list) throws RequestException, SQLException, IOException {
//"year,month,month_day, week_day,hour,minute,second"
Calendar cal = sdf.getCalendar();
String fullFileName = dir + "/" + "times.dat";
Writer timesFile = new BufferedWriter(new FileWriter(new File(fullFileName), false));
for (Times times : list) {
cal.setTime(times.getCreation());
String line =
times.getWait()+times.getExecution()+" "+
times.getExecution()+" "+
times.getWait()+" "+
cal.get(Calendar.YEAR)+" "+
(cal.get(Calendar.MONTH)+1)+" "+
cal.get(Calendar.DAY_OF_MONTH)+" "+
cal.get(Calendar.DAY_OF_WEEK)+" "+
cal.get(Calendar.HOUR_OF_DAY)+" "+
cal.get(Calendar.MINUTE)+" "+
cal.get(Calendar.SECOND);
timesFile.write(line);
timesFile.write(System.getProperty("line.separator"));
}
timesFile.close();
}
Examples of the R files can be seen in the last section.
Nice Figures
2D Plot With Two Y-Axis and X-Axis Labeled by Dates
library(fCalendar)
### Variables
myData <- 'EUR_CHF_2002-01-01-00:00:00_2008-11-02-00:00:00_6MONTH_0.25.r'
instr <- substr(myData,1,7)
from <- substr(myData,9,18)
to <- substr(myData,29,38)
inc <- substr(myData,56,59)
hor <- substr(myData,49,54)
main <- paste(instr, ' ', hor, ' ', inc)
### Load data
load(myData)
lgth <- length(inData)
X <- rep(0.0, lgth)
Y <- rep(0.0, lgth)
Z <- rep(0.0, lgth)
T <- seq(as.Date('1970-01-01'), by='day', length=lgth)
for (i in 1:lgth) {
X[i] <- i;
Y[i] <- inData[[i]]$value$B;
Z[i] <- inData[[i]]$value$C;
T[i] <- timeDate(inData[[i]]$beanEnd)
}
### Plot
par(mar=c(4.5,4.5,3,5))
#par(mfrow=c(1,1))
#plot(X, Y, xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y)), yaxt='n', xlab = 't', ylab = 'C', main='', type='p', pch=20)
plot(T, Y, ylim=c(min(Y), max(Y)), xaxt='n', xlab = 't (end date)', ylab = 'E', main='', type='p', pch=20)
# x-axis
#sequ <- T[seq(1, by=20, length=lgth/20)]
firstend <- seq(as.Date(from), by='6 month', length=2)[2]
dt <- difftime(to, from, units='days')
yrs <- as.double(dt)/365
iYrs <- as.integer(yrs)
if(yrs >= 3) {
sequ <- seq(as.Date(from),as.Date(to), by='year')
} else if (yrs >= 1) {
sequ <- seq(as.Date(from),as.Date(to), by='3 month')
} else if (yrs >= 0.7) {
sequ <- seq(as.Date(from),as.Date(to), by='2 month')
} else if (yrs >= 0.2) {
sequ <- seq(as.Date(from),as.Date(to), by='2 day')
} else {
sequ <- seq(as.Date(from),as.Date(to), by='1 day')
}
#sequ <- seq(as.Date(from), by='year', length=iYrs)
myX <- c(firstend, as.Date(to))
trsf <- myX[myX > T[1]]
axis.Date(1, at = trsf, labels = format(trsf, format='%m-%d'))
real <- sequ[sequ > T[1]]
axis.Date(1, at = real, labels = format(real, format='%m-%d'))
axis.Date(1, at = real, col='black', padj=1.5)
# Comment: note problems with axis.Date
par(new=t)
plot(X, Y, xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y)), xaxt='n', yaxt='n', xaxt='n', xlab = '', ylab = '', main=main, type='l')
par(new=t)
plot(X, Z, xlim=c(min(X), max(X)), ylim=c(min(Z), max(Z)), xaxt='n', yaxt='n', xlab = '', ylab = '', main='', pch='*')
par(new=t)
plot(X, Z, xlim=c(min(X), max(X)), ylim=c(min(Z), max(Z)), xaxt='n', yaxt='n', xaxt='n', xlab = '', ylab = '', main='', type='l', lty=3)
axis(4)
mtext('C', side=4, pad=4)
3D Scatterplot
Library on cran.r-project.org.
Needs version 2.7.0 or later. See [1] or R for updating.
For each t (y-axis) value, the data points are plotted in a for loop. Couldn't figure out how to rotate in the direction of the z-axis.
library(scatterplot3d)
myData <- '/home/jbg/work/svn/j4/EUR_CHF_2002-01-01-00:00:00_2002-08-02-00:00:00_6MONTH_0.50.r'
load(myData)
instr <- substr(myData,1,7)
from <- substr(myData,9,18)
inc <- substr(myData,56,59)
hor <- substr(myData,49,54)
main <- paste(instr, ' ', hor, ' ', inc)
size <- length(inData)
xmax <- -1e10
xmin <- 1e10
ymax <- -1e10
ymin <- 1e10
for (j in 1:size) {
xmax <- max(xmax, max(inData[[j]]$value$X))
ymax <- max(ymax, max(inData[[j]]$value$Y))
xmin <- min(xmin, min(inData[[j]]$value$X))
ymin <- min(ymin, min(inData[[j]]$value$Y))
}
for (j in 1:size) {
lgth <- length(inData[[j]]$value$X)
X <- inData[[j]]$value$X
Y <- inData[[j]]$value$Y
ind <- which(Y==0)
if(length(ind) > 0) {
X <- X[-ind];
Y <- Y[-ind];
}
X <- log( X )
Y <- log( Y )
# Linear model
res <- lm(Y~X)
# Estimate parameters
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
# Define function
slEmpRSx <- function(t) (t/C)^B
# Plot
plo <- scatterplot3d(X,rep(j, lgth), Y, xlim=log(c(xmin,xmax)), zlim=log(c(ymin,ymax)), ylim=c(1,size), pch='.', box = FALSE, angle = 45, scale.y=1, grid=F, main=main, xlab='DX', zlab='#DC', ylab='t')
# Regression line
y <- slEmpRSx(exp(X))
#plo$points3d(X,rep(j, lgth), log(y), type='l')
par(new=TRUE)
}
3D Contour Plot
Using the empirical data from the scatterplot above to estimate scaling laws.
myData <- 'EUR_CHF_2002-01-01-00:00:00_2008-11-02-00:00:00_6MONTH_0.25.r'
load(myData)
instr <- substr(myData,1,7)
from <- substr(myData,9,18)
inc <- substr(myData,56,59)
hor <- substr(myData,49,54)
main <- paste(instr, ' ', hor, ' ', inc)
size <- length(inData)
lgth <- length(inData[[1]]$value$X)
xmax <- -1e10
xmin <- 1e10
ymax <- -1e10
ymin <- 1e10
newX <- seq(1,size,by=3)
mat <- matrix(0, nrow=lgth, ncol=length(newX))
#for (j in 1:size) {
for(j in newX) {
xmax <- max(xmax, max(inData[[j]]$value$X))
ymax <- max(ymax, max(inData[[j]]$value$Y))
xmin <- min(xmin, min(inData[[j]]$value$X))
ymin <- min(ymin, min(inData[[j]]$value$Y))
}
myInd <- 1
for (j in newX) {
X <- inData[[j]]$value$X
Y <- inData[[j]]$value$Y
ind <- which(Y==0)
if(length(ind) > 0) {
X <- X[-ind];
Y <- Y[-ind];
}
lX <- log( X )
lY <- log( Y )
# Linear model
res <- lm(lY~lX)
# Estimate parameters
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
# Define function
slEmpRSxLog <- function(t) (t/C)^B
slEmpRSx <- function(t) A+B*t
#plot(X,Y, log='xy', xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y))) # Real axis vals
#plot(lX,lY, xlim=c(min(lX), max(lX)), ylim=c(min(lY), max(lY))) # Log axis vals
## Real axis vals
#plot(slEmpRSxLog, log='xy', xlim=c(min(X), max(X)), ylim=c(min(Y), max(Y)))
#plot(slEmpRSxLog(X), log='xy')
## Log axis vals
#plot(slEmpRSx, xlim=c(min(lX), max(lX)), ylim=c(min(lY), max(lY)))
#plot(slEmpRSx(X))
#plot(slEmpRSx(lX), log='x')
# Save data
#sequ <- seq(min(X), max(X), by= (max(X) - min(X)) / lgth)
sequ <- seq(min(lX), max(lX), by= (max(lX) - min(lX)) / lgth)
sequ <- sequ[1:lgth]
#mat[,j] <- slEmpRSxLog(sequ)
mat[,myInd] <- slEmpRSx(sequ)
myInd <- myInd + 1
}
persp(sequ, seq(1,length(newX)), mat, phi=25, theta=55, xlab='DX', zlab='#DC', ylab='t', ticktype='detailed', main=main, r=10, shade=0.1, lphi=0, ltheta = -135)
Histogram with Inset
Thanks Alex...
library(lattice)
graphics.off()
times <- read.table("times.dat")
pdf("latency.pdf", onefile=F)
qs <- quantile(times[,1],probs=seq(0,1,0.01));
s <- subset(times[,1],times[,1]>qs[96]);
plot.new()
h1 <- histogram(times[,1],xlab="latency [ms]",ylab="occurences",main="",col="salmon",type="count")
h2 <- histogram(s,xlab="latency [ms]",ylab="occurences",main="> 95th-centile",col="salmon",type="count")
print(h1,more=T)
print(h2,position = c(.4,.2,.9,.65))
m <- round(mean(times[,1])*100)/100
s <- round(sd(times[,1])*100)/100
text(0.15,0.98,paste("mean = ",m," / std = ",s),pos=4)
q90 <- round(qs[91]*10)/10;
q95 <- round(qs[96]*10)/10;
q99 <- round(qs[100]*10)/10;
text(0.15,0.90,paste("perc.: 90% <",q90,", 95% <",q95,", 99% <",q99),pos=4)
dev.off()
Bar Chart
Thanks Alex...
library(lattice)
graphics.off()
pdf("pie.pdf", onefile=F)
times <- read.table("times.dat")
lat = times[,1];
exe = times[,2];
wai = times[,3];
res=sort(lat,index.return=T);
lat=res$x
exe[res$ix]=exe;
wai[res$ix]=wai;
n=length(lat);
nbSlices=20;
ds=n/nbSlices;
avl=seq(0,0,length=nbSlices)
ave=seq(0,0,length=nbSlices)
avw=seq(0,0,length=nbSlices)
for (i in seq(1,nbSlices)) {
i0=floor(1+(i-1)*ds);
i1=floor(i*ds);
if (i == nbSlices) i1=n;
avl[i]=mean(lat[i0:i1]);
ave[i]=mean(exe[i0:i1]);
avw[i]=mean(wai[i0:i1]);
}
y0=0;
y1=max(avl)
x=seq(1,nbSlices)*5-2.5
plot(x,avl,type="l",xlab="",ylab="",xlim=c(-2.5,97.5),ylim=c(y0,y1),lwd=2)
barplot(avl/avl*y1,width=5,add=T,space=0,col="salmon",axisnames=F)
barplot(ave/avl*y1,width=5,add=T,space=0,col="skyblue",axisnames=F)
par(new=T)
plot(x,avl,type="b",xlab="trades ascendingly sorted by latency time [%]",ylab="latency [ms]",xlim=c(-2.5,97.5),ylim=c(y0,y1),lwd=2)
dy=(y1-y0)/4;
axis(4,c(y0,y0+dy,y0+3*dy,y1),c(0,25,75,100))
mtext("times decomposition [%]",side=4)
legend(x=mean(x),y=y1/2,c("execution/latency","waiting/latency"),fill=c("skyblue","salmon"),bg="white",xjust=0.5,yjust=0.5)
dev.off()
2D Plot with Error Bars
Daily seasonality. Thanks Alex...
library(lattice)
graphics.off()
pdf("dailyLat.pdf", onefile=F)
times <- read.table("times.dat")
lat = times[,1];
hour = times[,8];
sx = seq(0,0,length=24)
sx2 = seq(0,0,length=24)
nh = seq(0,0,length=24)
n=length(lat)
for (i in seq(1,n)) {
sx[hour[i]+1] = sx[hour[i]+1]+lat[i];
sx2[hour[i]+1] = sx2[hour[i]+1]+(1.0*lat[i]*lat[i]);
nh[hour[i]+1] = nh[hour[i]+1] + 1;
}
av=sx/pmax(nh,1);
std=sqrt(sx2/pmax(nh,1)-av*av);
y0=min(av-std/2);
y1=max(av+std/2);
x <- seq(0,23)
plot(x,av,type="b",ylim=c(y0,y1),xlab="hours",ylab="latency [ms]",main="Daily seasonality")
arrows(x,av-std/2,x,av+std/2,length=.05,angle=90,code=3,col="gray")
dev.off()
Histogram with Daily Bins
Weekly seasonality. Thanks Alex...
library(lattice)
graphics.off()
pdf("weeklyLat.pdf", onefile=F)
times <- read.table("times.dat")
lat = times[,1];
wday = times[,7];
hour = times[,8];
sx = seq(0,0,length=168)
sx2 = seq(0,0,length=168)
nh = seq(0,0,length=168)
n=length(lat)
for (i in seq(1,n)) {
j=(wday[i]-1)*24+hour[i]+1;
sx[j] = sx[j]+lat[i];
sx2[j] = sx2[j]+(1.0*lat[i]*lat[i]);
nh[j] = nh[j] + 1;
}
av=sx/pmax(nh,1);
std=sqrt(sx2/pmax(nh,1)-av*av);
y0=min(av-std/2);
y1=max(av+std/2);
x <- seq(0,167)
plot(x,av,type="l",ylim=c(y0,y1),xlab="hours",ylab="latency [ms]",main="Weekly seasonality")
#arrows(x,av-std/2,x,av+std/2,length=.05,angle=90,code=3,col="gray")
# Infos about the days
lines(c(0,0),c(y0,y1),col="gray")
text(12,y1,"Sun",col="gray",pos=1)
lines(c(24,24),c(y0,y1),col="gray")
text(36,y1,"Mon",col="gray",pos=1)
lines(c(48,48),c(y0,y1),col="gray")
text(60,y1,"Tue",col="gray",pos=1)
lines(c(72,72),c(y0,y1),col="gray")
text(84,y1,"Wed",col="gray",pos=1)
lines(c(96,96),c(y0,y1),col="gray")
text(108,y1,"Thu",col="gray",pos=1)
lines(c(120,120),c(y0,y1),col="gray")
text(132,y1,"Fri",col="gray",pos=1)
lines(c(144,144),c(y0,y1),col="gray")
text(156,y1,"Sat",col="gray",pos=1)
lines(c(168,168),c(y0,y1),col="gray")
dev.off()
The times.dat file looks like
135 135 0 2008 10 9 5 11 47 25 135 135 0 2008 10 10 6 21 55 27 ...
Script Examples
Scaling Law Estimation
See also above...
File my.dat:
0.01 7947915.0 0.1 100000.0 1.0 280.0 2.0 220.0 5.0 25.0
R script:
### Data
dat <- read.table("my.dat")
lab <- 'LABEL'
xlabT <- 'XLAB'
ylabT <- 'YLAB'
### Estimate
dat$V1 <- dat$V1
dat$V2 <- dat$V2
ldat <- log(dat)
Min <- min(ldat$V1)
Max <- max(ldat$V1)
X <- ldat$V1[ldat$V1 >= Min & ldat$V1 <= Max]
Y <- ldat$V2[ldat$V1 >= Min & ldat$V1 <= Max]
res <- lm(Y~X)
# Error propagation
A <- summary(res)$coefficients[1,1]
dA <- summary(res)$coefficients[1,2]
B <- summary(res)$coefficients[2,1]
dB <- summary(res)$coefficients[2,2]
C <- exp(-A/B)
C.err <- sqrt((-C/B*dA)^2+(A/B^2*C*dB)^2)
slEmpRSx <- function(t) (t/C)^B
cat('Exp:', B)
cat(' +- ', dB)
cat('\nCst:', C)
cat(' +- ', C.err)
cat('\n')
### Plot
plot(exp(X),exp(Y), xlim=c(min(exp(X)), max(exp(X))), log='xy', ylim=c(min(exp(Y)), max(exp(Y))), cex=1.5, lwd=2.5, col='blue', ylab = ylabT, xlab = xlabT, main='TITLE', type='l', lty=3, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5)
par(new=TRUE)
plot(exp(X),exp(Y), xlim=c(min(exp(X)), max(exp(X))), log='xy', ylim=c(min(exp(Y)), max(exp(Y))), cex=1.5, lwd=1.5, col='blue', ylab = ylabT, xlab = xlabT, main='TITLE', type='p', lty=0, bg='white', yaxt='n', xaxt='n', cex.main=2, cex.lab=1.5)
par(new=TRUE)
plot(exp(X),slEmpRSx(exp(X)), log='xy', type='l', col='red', ylab = ylabT, xlab = xlabT)
Problems
Labelling Axis with Dates using axis.Date
If you get
Error in axis(side, at = z, labels = labels, ...) : 'at' and 'labels' lengths differ, 2 != 3
for
axis.Date(1, at = Z, labels = format(Z, format='%m-%d'))
where Z and format(Z, format='%m-%d') have the same length, then the problem is
one of the values of Z is outside of the current range of the plot.
