GoogleSearch
이 블로그 검색
R: my cookbook
라벨:
Informatics
이메일로 전송BlogThis!X에 공유Facebook에서 공유
{My R Cookbook}
ToC
- transform() column class/mode
- reshape table
- add a new column with a variable name
- flexible R ps/pdf font handling
- Match
- Direct data input
- Sum() by a reference column
- Not consistent column numbers in table?
- String as variable name
- Memory-efficient coding
- Data-type issue
- split strings in a vector and convert it to a data.frame
- Installation
- read table with variable column numbers
# transform() column class/mode
transform(d, fake_char = as.numeric(fake_char),
char_fac = as.numeric(char_fac))
# Reshape table
http://stackoverflow.com/questions/9617348/reshape-three-column-data-frame-to-matrix
[FROM]
x a 1
x b 2
x c 3
y a 3
y b 3
y c 2
[TO]
a b c
x 1 2 3
y 3 3 2
[HOW TO]?
tmp <- data.frame(x=gl(2,3, labels=letters[24:25]),
y=gl(3,1,6, labels=letters[1:3]),
z=c(1,2,3,3,3,2))
[Using reshape2:]
library(reshape2)
acast(tmp, x~y, value.var="z")
[Using matrix indexing:]
with(tmp, {
out <- matrix(nrow=nlevels(x), ncol=nlevels(y),
dimnames=list(levels(x), levels(y)))
out[cbind(x, y)] <- z
out
})
[Using xtabs
:]
xtabs(z~x+y, data=tmp)
[Using reshape()]
> reshape(tmp, idvar="x", timevar="y", direction="wide")
x z.a z.b z.c
1 x 1 2 3
4 y 3 3 2
# Add a new column with a variable name
x = cbind( a=1:2, b=3:4)
x = data.frame(x) # should be a data frame
new = 'c'
x[[ new]] = 5:6 # this works only with data frame
x
# Flexible R ps/pdf fonts handling
http://blog.revolutionanalytics.com/2012/09/how-to-use-your-favorite-fonts-in-r-charts.html
library(extrafont)
library(extrafont)
font_import()
fonts()
names(odfFonts())
# Match
which( 20 == c(11,22,20))
[1] 3
c(11,20,30) %in% c(10,33,20)
[1] FALSE TRUE FALSE
match( c(10,20,30), c(11,33,20) )
[1] NA 3 NA
# Direct data input
dat <- read.table(header = TRUE, text = "SNP Geno Allele
marker1 G1 AA
marker2 G1 TT
marker3 G1 TT
marker1 G2 CC
marker2 G2 AA
marker3 G2 TT
marker1 G3 GG
marker2 G3 AA
marker3 G3 TT")
# Sum column values by a reference column
tappy( t$vak, t$ref, FUC=sum) # only one column
or
# for multiple columns
aggreagte( t[,3:10], by=list(symbol=$symbol), FUN=sum)
# Version check
str(.Platform)
version
# Not the consistent column in table?
gmt = "c2.all.v4.0.symbols.gmt"
gmt = "c2-kegg-biocarta-reactome.gmt"
fieldCount = count.fields(gmt, sep = "\t")
msigdb = read.csv( file=gmt, sep="\t", header=F, stringsAsFactor=F, fill=T, col.names=1:max(fieldCount))
## for fast computation, use matrix
msigdb = as.matrix( msigdb )
# Check classes of columns | reading table faster
x = read.table( file="xxx", nrows=10000)
sapply( x, class )
read.table( file="xxx", colClasses = sapply(x, class) )
# String as variable name
assign("x", 5)
i=3
assign( paste("a.", i, sep=""), 6 )
get( paste("a.",i,sep="")
# Matrix
> x = matrix(1:6,2,3)
> x
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> x[[3]]
[1] 3
> x[1,2,drop=F]
[,1]
[1,] 3
> x[1, , drop=F]
[,1] [,2] [,3]
[1,] 1 3 5
# List
x = list( foo=1:4, bar=0.6 )
x[1]
x[1]
$foo
[1] 1 2 3 4
x[[1]]
[1] 1 2 3 4
> x$bar
[1] 0.6
> x[["bar"]]
[1] 0.6
> x["bar"]
$bar
[1] 0.6
> x= list( aadsffs = 1:5, afdfsaf=1:6)
> x$aa
[1] 1 2 3 4 5
> x[["aa", exact=F]]
[1] 1 2 3 4 5
> complete.cases( vector1, vector2 ) # remove NA
# Memory-efficient
Source: http://www.r-bloggers.com/faster-for-loops-in-r/
slow!
x = c()
system.time(
for(i in 1:40000){
x = c(x,i) #here i is combined with previous contents of x
}
)
user system elapsed
1.54 0.00 1.53
Much faster trick!
x = rep( NA, 50000 )
x = NULL
system.time(
for(i in 1:40000){
x[i] = i #changing value of particular element of x
}
)
user system elapsed
0.57 0.00 0.56
EVEN faster trick!
x = rep( NA, 50000 )
x = rep(NA, 1000000)
system.time(
for(i in 1:40000){
x[i] = i #changing value of particular element of x
}
)
x[!is.na(x)]
user system elapsed
0.06 0.00 0.06
Warning: Data types! as.numeric()
http://stackoverflow.com/questions/2288485/how-to-convert-a-data-frame-column-to-numeric-type
d = data.frame(char = letters[1:5],
fake_char = as.character(1:5),
fac = factor(1:5),
char_fac = factor(letters[1:5]),
num = 1:5, stringsAsFactors = FALSE)
sapply(d, mode)
sapply(d, class)
transform(d, fake_char = as.numeric(fake_char),
char_fac = as.numeric(char_fac))
split strings in a vector and convert it to a data.frame
string = c("xy_100_ab", "xy_101_ab","xy_102_ab","xy_103_ab")
out = data.matrix( do.call( rbind, strsplit( string, '_' ) ) )
## if you use dataframe, your number will be factor, not character.
names(out) = paste('column',1:3,sep="")
Concatenate vectors
paste( v, collapse="" )
Get all pkgs list and re-install
- In old R
- IP = as.data.frame(installed.packages())
- MyPkgs <- subset(IP, !Priority %in% c("base", "recommended"), select = c(Package, Bundle))
- Then in a new version R,
- install.packages(MyPkgs$Package, dependencies = TRUE)
#--run in the old version of R
setwd("C:/Temp/")
packages <- installed.packages()[,"Package"]
save(packages, file="Rpackages")
INSTALL NEW R VERSION
#--run in the new version
setwd("C:/Temp/")
load("Rpackages")
for (p in setdiff(packages, installed.packages()[,"Package"]))
install.packages(p)
Installation Error
- custome build: ./configure --enable-R-shlib
- "configure: error: --with-readline=yes (default) and headers/libs are not available".
- SOLUTION: yum install readline-devel
Basic
- How to delete previously saved workspace restored?
- unlink(".RData")
Text processing
- Why does read.table() stop reading table while read.delim() works just fine?
How to get the only first item from a list: do not use "unlist" problem with
a = c(1:10)
b = LETTERS[seq( from = 1, to = 10 )] x = list( a, b ) # or x = list( "a"=a, "b"=b ) names( x ) = c( "a", "b" )
## unlist lapply(x, FUN="[", 1);
lapply(x, FUN="[", 1);
unlist( lapply(x, FUN="[", 1) , use.names=F)
sapply(x, FUN=length);
http://www.mayin.org/ajayshah/KB/R/index.html
Read a table w/ variable column number
fieldCount = count.fields("table", sep = "\t")
read.csv( file="table", sep="\t", header=F, stringsAsFactor=F, fill=T, col.names=1:max(fieldCount))
R by example
Basics
Reading files
Graphs
Probability and statistics
Regression
Time-series analysis
ARMA estimation, diagnostics, forecasting
이메일로 전송BlogThis!X에 공유Facebook에서 공유
라벨:
Informatics
ToC
- transform() column class/mode
- reshape table
- add a new column with a variable name
- flexible R ps/pdf font handling
- Match
- Direct data input
- Sum() by a reference column
- Not consistent column numbers in table?
- String as variable name
- Memory-efficient coding
- Data-type issue
- split strings in a vector and convert it to a data.frame
- Installation
- read table with variable column numbers
transform(d, fake_char = as.numeric(fake_char),
char_fac = as.numeric(char_fac))
x a 1
x b 2
x c 3
y a 3
y b 3
y c 2
a b c
x 1 2 3
y 3 3 2
tmp <- data.frame(x=gl(2,3, labels=letters[24:25]),
y=gl(3,1,6, labels=letters[1:3]),
z=c(1,2,3,3,3,2))
library(reshape2)
acast(tmp, x~y, value.var="z")
with(tmp, {
out <- matrix(nrow=nlevels(x), ncol=nlevels(y),
dimnames=list(levels(x), levels(y)))
out[cbind(x, y)] <- z
out
})
xtabs(z~x+y, data=tmp)
> reshape(tmp, idvar="x", timevar="y", direction="wide")
x z.a z.b z.c
1 x 1 2 3
4 y 3 3 2
library(extrafont)
font_import()
fonts()
names(odfFonts())
dat <- read.table(header = TRUE, text = "SNP Geno Allele
marker1 G1 AA
marker2 G1 TT
marker3 G1 TT
marker1 G2 CC
marker2 G2 AA
marker3 G2 TT
marker1 G3 GG
marker2 G3 AA
marker3 G3 TT")
> x[1, , drop=F]
[,1] [,2] [,3]
[1,] 1 3 5
x[1]
$foo
[1] 1 2 3 4
[1] 1 2 3 4
> x$bar
[1] 0.6
> x[["bar"]]
[1] 0.6
> x["bar"]
$bar
[1] 0.6
> x= list( aadsffs = 1:5, afdfsaf=1:6)
> x$aa
[1] 1 2 3 4 5
> x[["aa", exact=F]]
[1] 1 2 3 4 5
> complete.cases( vector1, vector2 ) # remove NA
# Memory-efficient Source: http://www.r-bloggers.com/faster-for-loops-in-r/
slow!
x = c() system.time( for(i in 1:40000){ x = c(x,i) #here i is combined with previous contents of x } ) user system elapsed 1.54 0.00 1.53
Much faster trick!
x = rep( NA, 50000 ) x = NULL system.time( for(i in 1:40000){ x[i] = i #changing value of particular element of x } ) user system elapsed 0.57 0.00 0.56
EVEN faster trick!
x = rep( NA, 50000 ) x = rep(NA, 1000000) system.time( for(i in 1:40000){ x[i] = i #changing value of particular element of x } ) x[!is.na(x)] user system elapsed 0.06 0.00 0.06
Warning: Data types! as.numeric()
http://stackoverflow.com/questions/2288485/how-to-convert-a-data-frame-column-to-numeric-type
d = data.frame(char = letters[1:5], fake_char = as.character(1:5), fac = factor(1:5), char_fac = factor(letters[1:5]), num = 1:5, stringsAsFactors = FALSE) sapply(d, mode) sapply(d, class) transform(d, fake_char = as.numeric(fake_char), char_fac = as.numeric(char_fac))
split strings in a vector and convert it to a data.frame
string = c("xy_100_ab", "xy_101_ab","xy_102_ab","xy_103_ab") out = data.matrix( do.call( rbind, strsplit( string, '_' ) ) ) ## if you use dataframe, your number will be factor, not character.
names(out) = paste('column',1:3,sep="")
Concatenate vectors
paste( v, collapse="" )
Get all pkgs list and re-install
- In old R
- IP = as.data.frame(installed.packages())
- MyPkgs <- subset(IP, !Priority %in% c("base", "recommended"), select = c(Package, Bundle))
- Then in a new version R,
- install.packages(MyPkgs$Package, dependencies = TRUE)
#--run in the old version of R
setwd("C:/Temp/")
packages <- installed.packages()[,"Package"]
save(packages, file="Rpackages")
INSTALL NEW R VERSION
#--run in the new version
setwd("C:/Temp/")
load("Rpackages")
for (p in setdiff(packages, installed.packages()[,"Package"]))
install.packages(p)
Installation Error- custome build: ./configure --enable-R-shlib
- "configure: error: --with-readline=yes (default) and headers/libs are not available".
- SOLUTION: yum install readline-devel
- How to delete previously saved workspace restored?
- unlink(".RData")
- Why does read.table() stop reading table while read.delim() works just fine?
How to get the only first item from a list: do not use "unlist" problem with
a = c(1:10) b = LETTERS[seq( from = 1, to = 10 )] x = list( a, b ) # or x = list( "a"=a, "b"=b ) names( x ) = c( "a", "b" ) ## unlist lapply(x, FUN="[", 1); lapply(x, FUN="[", 1); unlist( lapply(x, FUN="[", 1) , use.names=F) sapply(x, FUN=length);http://www.mayin.org/ajayshah/KB/R/index.html
Read a table w/ variable column number
fieldCount = count.fields("table", sep = "\t")
read.csv( file="table", sep="\t", header=F, stringsAsFactor=F, fill=T, col.names=1:max(fieldCount))
R by example
Basics
Reading files
Graphs
Probability and statistics
Regression
Time-series analysis
ARMA estimation, diagnostics, forecasting |
Scientist. Husband. Daddy. --- TOLLE. LEGE
외부자료의 인용에 있어 대한민국 저작권법(28조)과 U.S. Copyright Act (17 USC. §107)에 정의된 "저작권물의 공정한 이용원칙 | the U.S. fair use doctrine" 을 따릅니다. 저작권(© 최광민)이 명시된 모든 글과 번역문들에 대해 (1) 복제-배포, (2) 임의수정 및 자의적 본문 발췌, (3) 무단배포를 위한 화면캡처를 금하며, (4) 인용 시 URL 주소 만을 사용할 수 있습니다. [후원 | 운영] [대문으로] [방명록] [옛 방명록] [티스토리 (백업)] [신시내티]
-