GoogleSearch
이 블로그 검색
[TIP] biomaRt
라벨:
Informatics
이메일로 전송BlogThis!X에 공유Facebook에서 공유
http://biomart2010.wikispaces.com/
Installing biomaRt
-
source( "http://www.bioconductor.org/biocLite.R")
biocLite(’biomaRt’)
Check Available BioMarts
-
library(biomaRt)
listMarts()
Select the Ensembl BioMart
-
ensembl = useMart("ensembl")
datasets = listDatasets(ensembl)
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
attributes=listAttributes(ensembl)
filters=listFilters(ensembl)
Task 1a
-
affyids = c("211550_at","202431_s_at","206044_s_at")
annotation = getBM(c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band","strand"), filters="affy_hg_u133_plus_2", values=affyids,mart = ensembl)
print(annotation)
Task 1aa
library(biomaRt)
mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
refseq = c("NM_006945", "NM_152486", "NM_198317", "XM_001125684" )
#getBM(filters=c( "refseq_mrna", "refseq_mrna_predicted"), attributes="external_gene_id", values=refseq, mart=mart)
getBM(filters="refseq_mrna", attributes=c("refseq_mrna", "hgnc_symbol"), values=refseq, mart=mart)
listAttributes( mart)[grep( "refseq", listAttributes( mart)[,1]), ]
# coding
NM = getBM(filters="refseq_mrna", attributes=c("refseq_mrna", "hgnc_symbol"), values=refseq, mart=mart)
# coding - predict
XM = getBM(filters="refseq_mrna_predicted", attributes=c("refseq_mrna_predicted", "hgnc_symbol"), values=refseq, mart=mart )
# non-coding
NR = getBM(filters="refseq_ncrna" , attributes=c("refseq_ncrna", "hgnc_symbol"), values=refseq, mart=mart )
# non-coding-predicted
XR = getBM(filters="refseq_ncrna_predicted", attributes=c("refseq_ncrna_predicted", "hgnc_symbol"), values=refseq, mart=mart )
colnames( NM) = c("refseq", "hgnc")
colnames( XM) = c("refseq", "hgnc")
colnames( NR) = c("refseq", "hgnc")
colnames( XR) = c("refseq", "hgnc")
refseq2hgnc = rbind( NM, XM, NR, XR )
Task 1b
-
illuminaIDs = c("ILMN_1728071","ILMN_1662668")
goAnnot = getBM(c("illumina_humanwg_6_v2", "go_biological_process_id","go_biological_process_linkage_type"), filters="illumina_humanwg_6_v2", values=illuminaIDs, mart = ensembl)
print(goAnnot[1:5,])
Task 2
-
diab=getBM(c("ensembl_gene_id","hgnc_symbol"),filters=c("mim_morbid_accession","go"), values=list(c("125853","222100"),"GO:0003700"),mart=ensembl)
print(diab)
Task3
-
miRNA = getBM(c("mirbase_id","ensembl_gene_id","start_position","chromosome_name"), filters=c("chromosome_name","with_mirbase"), values=list(13,TRUE), mart=ensembl)
miRNA[1:5,]
Task4
-
filterOptions("snptype_filters",ensembl)
entrez = getBM("entrezgene",filters=c("chromosome_name","snptype_filters"), values=list(22,"NON_SYNONYMOUS_CODING"),mart=ensembl)
entrez[1:5,]
Task5
-
seq = getSequence(id="CDH1", type="hgnc_symbol",seqType="gene_exon", mart = ensembl)
seq[1,]
Task6
-
promoter=getSequence(id=c("APC","CUL1"), type="hgnc_symbol",seqType="coding_gene_flank",upstream =2000, mart=ensembl)
Task7
-
human=useMart("ensembl", dataset="hsapiens_gene_ensembl")
chicken=useMart("ensembl", dataset="ggallus_gene_ensembl")
mapping = getLDS(attributes=c("affy_hg_u95av2","hgnc_symbol"), filters="affy_hg_u95av2", values=c("1888_s_at","1434_at"),mart=human,attributesL="affy_chicken", martL=chicken)
Accessing Archived Ensembl versions
-
listMarts(host="may2009.archive.ensembl.org/biomart/martservice/")
ensembl54=useMart("ENSEMBL_MART_ENSEMBL", host="may2009.archive.ensembl.org/biomart/martservice/")
listDatasets(ensembl54)
ensembl54=useMart("ENSEMBL_MART_ENSEMBL", host="may2009.archive.ensembl.org/biomart/martservice/", dataset='hsapiens_gene_ensembl')
Task8
-
snp=useMart("snp", dataset="hsapiens_snp")
out=getBM(attributes=c("refsnp_id","allele","chrom_start"), filters=c("chr_name","chrom_start","chrom_end"), values=list(8,148350,158612), mart=snp)
out[1:5,]
Task9
-
hapmap = useMart("HapMap_rel27", dataset="hm27_variation_yri")
yri = getBM(c("chrom","start","alleles","ref_allele","ref_allele_freq","other_allele_freq"),filters=c("chrom","coding_nonsynon"),values=list("chr19",TRUE),mart=hapmap)
head(yri)
Task10
-
cosmic=useMart("CosmicMart",dataset="COSMIC47")
mut = getBM(c("sample_name","gene_name","aa_mut_syntax","mut_type_cds","zygosity"),filters="sample_name",values=c("MCF7","BT474"),mart=cosmic)
head(mut)
Task11
-
reactome = useMart("REACTOME", dataset="pathway")
ids = getBM(c("referencedatabase_uniprot","_displayname"),filters=c("_displayname","species_selection"),value=list(c("DNA Repair","Signaling by WNT","Muscle contraction"),"Homo sapiens"), mart=reactome)
Task12
-
gramene = useMart("ENSEMBL_MART_ENSEMBL", dataset="athaliana_eg_gene")
getBM(c("ensembl_gene_id","external_gene_id","description","start_position","end_position"), filters=c("chromosome_name","start","end"), values=list("1", "30000","41000"), mart=gramene)
Task13
-
worm = useMart("wormbase195", dataset="wormbase_rnai")
pheno = getBM(c("rnai","phenotype_primary_name"), filters="gene", values="WBGene00006763", mart=worm)
head(pheno)
Task14 : get gene infos
- gene.list = c( "MBNL3", "WNT5A", "NFIA" )
- gs = getBM(attributes=c("hgnc_symbol", "ensembl_gene_id", "chromosome_name", 'strand', "start_position", "end_position", "band"), filter=c("hgnc_symbol"), values=gene.list, mart=ensmart)
archive:
http://useast.ensembl.org/info/website/archives/index.html
## biomart = "ENSEMBL_MART_ENSEMBL" # use archive to get ensembl 65
## biomaRt_dataset = "mmusculus_gene_ensembl" # mouse dataset id name
## host = "dec2011.archive.ensembl.org" # use ensembl 65 for annotation for mm9
## goAnno = "org.Mm.eg.db" # GO annotation database
## mm10
mart = useMart('ENSEMBL_MART_ENSEMBL',dataset='mmusculus_gene_ensembl',host="may2012.archive.ensembl.org")
to get customized seqs
library(biomaRt)
ensmart=useMart("ensembl",dataset="hsapiens_gene_ensembl" ) #, mysql=TRUE)
gene = getGene(id='PITX2', type='hgnc_symbol', mart=ensmart)
seq = getSequence(id="BRCA1", type="hgnc_symbol", seqType="peptide", mart = mart)
show(seq)
seq = getSequence(id="1939_at", type="affy_hg_u95av2", seqType="gene_flank",upstream = 20, mart = mart)
show(seq)
##
## entrez
##
entrez=c("673","7157","837")
getSequence(id = entrez, type="entrezgene",seqType="coding_gene_flank",upstream=10, mart=ensmart)
OR
library(BSgenome.Hsapiens.UCSC.hg19)
toString(subseq(Hsapiens$chr17, 1000000,1000005))
OR
http://useast.ensembl.org/das/Homo_sapiens.GRCh37.reference/sequence?segment=1:100000,110000
(my gene_list is in gene symbol)
>my_mart = useMart("ensembl",dataset="hsapiens_gene_ensembl")
>seq_3utr = getSequence(id = unique(gene.symbol),
type="hgnc_symbol",seqType="3utr",mart = my_mart)
>seq_3utr = seq_3utr[seq_3utr[,"3utr"] != "Sequence unavailable",]
>here: extract longest 3'UTR for each unique gene symbol
>exportFASTA(seq_3utr, file=paste("s3utr.fa",sep=""))
As my project goes, I now need 3'UTR genomic coordinates to get
phastcons conservation for some regions in 3'UTR.
To do that, I first convert hgnc_symbol back to ensembl_gene_id, then
get 3'UTR coordinates using getBM like this:
>s3utr = read.DNAStringSet(paste("s3utr.fa",sep=""),format="fasta")
>gene_names = names(s3utr)
>hgnc2ensembl = getBM(attributes=c("hgnc_symbol","ensembl_gene_id"),
filters="hgnc_symbol", values=gene_names, mart=my_mart)
>s3utr_pos = getBM(attributes=c("ensembl_gene_id",
"chromosome_name","strand","3_utr_start", "3_utr_end"),
filters="ensembl_gene_id", values=as.character(hgnc2ensembl
$ensembl_gene_id), mart=my_mart)
>s3utr_pos = s3utr_pos[complete.cases(s3utr_pos),]
By doing that, now I can only get about 5000 gene symbols with 3'UTR
coordinates (converting from hgnc_symbol back to ensembl_gene_id
looses about 250 genes). I was thinking it might be version
difference? So I tried to use ensembl archive but it gives me error as
below:
> my_mart =
useMart("ensembl_mart_50",dataset="hsapiens_gene_ensembl",archive=T)
Error in value[[3L]](cond) :
Request to BioMart web service failed. Verify if you are still
connected to the internet. Alternatively the BioMart web service is
temporarily down.
In addition: Warning message:
In file(file, "r") : cannot open: HTTP status was '404 Not Found'
Retrieve genomic positions
#R --slave \< ../get-genomic-coordinate-using-biomaRt.R genelist output
args = commandArgs()
gene.list.file = args[3]
output = args[4]
#gene.list.file = "a"
gene.list = read.table( file=gene.list.file, header=F, sep="\t" )
gene.list = as.vector( gene.list$V1 ) # V1 == hgnc_symbol
#library(GenomeGraphs)
library(biomaRt)
#mart = useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
#----------------
# match to hg19 / NCBI build 37
#----------------
#mart = useMart( biomart="ensembl", dataset = "hsapiens_gene_ensembl")
###----------------
### match to hg18 / NCBI build 36
###----------------
mart = useMart( biomart="ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl", host = "may2009.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE) # to use
the hg18, NCBI build 36
gs = getBM(attributes=c("hgnc_symbol", "ensembl_gene_id", "chromosome_name", 'strand', "start_position", "end_position", "band"), filter=c("hgnc_symbol"), values=gene.list, mart
=mart)
#gs = getBM(attributes=c("hgnc_symbol", "ensembl_gene_id", "chromosome_name", "start_position", "end_position", "band"), filter=c("hgnc_symbol"), values=gene_symbol, mart=mart)
#att1 = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", "strand", "start_position", "end_position", "transcript_start", "transcript_end", "band")
#att2 = c("ensembl_gene_id", "5_utr_start", "5_utr_end","3_utr_start", "3_utr_end", "exon_chrom_start", "exon_chrom_end")
#gs1 = getBM(mart=mart, attributes=att1, value=c("hgnc_symbol") ) #, values=gene_symbol)
#gs2 = getBM(mart=mart, attributes=att2 ) #, values=gene_symbol)
#gs= merge(gs1,gs2)
#gs = unique( gs )
#gene.list = c( "EIF4ENIF1" )
out = NULL
notMatch = NULL
for( symbol in gene.list ) {
## this can be multi-lines
info = gs[gs$hgnc_symbol == symbol,][,c(1,3:6)]
if( length(info) == 0 ) {
notMatch = c( notMatch, symbol )
} else {
chr = as.vector( info$chromosome_name )
out = rbind( out, info[grep( "^[0-9XY]", chr ),] )
}
}
print( out )
#out[ out$chromosome_name == "X",]$chromosome_name = "23"
#out[ out$chromosome_name == "Y",]$chromosome_name = "24"
write.table( out, file=output, row.names=F, col.names=F, sep="\t", quot=F )
to get the attributes from multiple pages of the same mart.
library("biomaRt")
mart = useMart("ensembl")
ensembl = useDataset("hsapiens_gene_ensembl", mart = mart)
p = c("200641_s_at" , "229411_at" , "223376_s_at")
Q1 = getBM(attributes =
c("affy_hg_u133_plus_2","entrezgene","ensembl_gene_id"),
filters = "affy_hg_u133_plus_2", mart=ensembl, values=p)
Q2 = getBM(attributes=c("validated","ensembl_gene_id"),
filters="affy_hg_u133_plus_2", mart=ensembl, values=p)
## clean up:
## Q1 = subset(Q1, !is.na(entrezgene))
Q = merge(Q1,Q2)
SNPs
library("biomaRt")
snp = useMart("snp", dataset="hsapiens_snp")
ensemblids = c("ENSG00000204296")
out = getBM(attributes=c("refsnp_id","chr_name","chrom_start",
"ensembl_gene_stable_id","validated",
"consequence_type_tv"),
filters=c("ensembl_gene"), values=c(ensemblids), mart=snp)
head(out)
refsnp_id chr_name chrom_start ensembl_gene_stable_id validated
1 rs517922 6 32258836 ENSG00000204296 hapmap
2 rs3117133 6 32313653 ENSG00000204296 cluster,freq,1000Genome
3 rs6621681 6 32292217 ENSG00000204296
4 rs6621681 6 32292217 ENSG00000204296
5 rs6621682 6 32292221 ENSG00000204296
6 rs6621682 6 32292221 ENSG00000204296
consequence_type_tv
1 DOWNSTREAM
2 INTRONIC
3 INTRONIC
4 NON_SYNONYMOUS_CODING
5 INTRONIC
6 SYNONYMOUS_CODING
Search TFBS agains promoter regions (~up/dn2K from TSS)
IUPAC Code Meaning Complement
A A T
C C G
G G C
T/U T A
M A or C K
R A or G Y
W A or T W
S C or G S
Y C or T R
K G or T M
V A or C or G B
H A or C or T D
D A or G or T H
B C or G or T V
N G or A or T or C N
[kwangmin@10 MSigDB-v3.0]$ grep PITX2 c3.tft.v3.0.symbols.gmt.morifSeq.txt
V$PITX2_Q2 CTCNANGTGNY
GGATTA_V$PITX2_Q2 YATGNWAAT
[kwangmin@10 MSigDB-v3.0]$ grep SOX9 c3.tft.v3.0.symbols.gmt.morifSeq.txt
V$SOX9_B1 SKYTAAAAATAACYCH
CATTGTYY_V$SOX9_B1 YATGNWAAT
[kwangmin@10 MSigDB-v3.0]$ grep PITX2_Q2 c3.tft.v3.0.symbols.gmt
library( biomaRt )
library(BSgenome.Hsapiens.UCSC.hg19)
#ensmart = useMart( biomart="E", dataset = "hsapiens_gene_ensembl" )
ensmart = useMart( biomart="ensembl", dataset="hsapiens_gene_ensembl")
system( "awk -F\"\t\" '{ if($4 > 2.5 && $5 > 2.5) print $2}' ~/Desktop/shSOX9_shPITX2-vs-shControl-BH5pct-FC1.csv | sort | uniq > up" )
system( "awk -F\"\t\" '{ if($4 < 0.5 && $5 < 0.5) print $2}' ~/Desktop/shSOX9_shPITX2-vs-shControl-BH5pct-FC1.csv | sort | uniq > dn" )
#gene.list = c( "SOX9", "PITX2", "RORA" )
#gene.list = as.vector( read.table( file="up")$V1 )
gene.list = as.vector( read.table( file="dn")$V1 )
#gs = getBM(attributes=c("hgnc_symbol", "ensembl_gene_id", "chromosome_name", 'strand', "start_position", "end_position", "band"), filter=c("hgnc_symbol"), values=gene.list, mart=ensmart)
## ---> not gene start/end, but transcript_start/end for TSS.
## use only complete chromosome
##
gs = gs[ grep( "^[0-9|X|Y]", gs$chromosome_name, perl=T), ]
g.sym = gs$hgnc_symbol
g.str = gs$strand
g.start = gs$start_position
g.end = gs$end_position
g.chr = paste( "chr", gs$chromosome, sep="")
##
## getSequence is cool for get up2k, but not dn2k.
##
## getSequence(id=gene.list, type="hgnc_symbol", seqType="coding_gene_flank", upstream = 20, mart = ensmart)
##
## up-stream from TSS
##
promoter = 2000
g.seq.up = NULL
g.seqs.up = NULL
for( i in 1:length(g.sym) ) {
if( g.str[i] == 1 ) {
g.seq.up = toString(subseq(Hsapiens[[g.chr[i]]], g.start[i]-promoter, g.start[i] ))
g.seqs.up = c( g.seqs.up, g.seq.up )
} else if( g.str[i] == -1 ) {
g.seq.up = toString( reverseComplement( subseq(Hsapiens[[g.chr[i]]], g.end[i], g.end[i]+promoter)) )
g.seqs.up = c( g.seqs.up, g.seq.up )
}
}
##
## down-stream from TSS
##
g.seq.dn = NULL
g.seqs.dn = NULL
for( i in 1:length(g.sym) ) {
if( g.str[i] == 1 ) {
g.seq.dn = toString(subseq(Hsapiens[[g.chr[i]]], g.start[i], g.start[i]+promoter ))
g.seqs.dn = c( g.seqs.dn, g.seq.dn )
} else if( g.str[i] == -1 ) {
g.seq.dn = toString( reverseComplement( subseq(Hsapiens[[g.chr[i]]], g.end[i]-promoter, g.end[i])) )
g.seqs.dn = c( g.seqs.dn, g.seq.dn )
}
}
search.table = as.data.frame( cbind( g.sym, g.chr, g.str, g.seqs.up, g.seqs.dn ) )
#patterns = c( 'AAAAAA' )
#matchPattern( DNAString("G[:upper:]G"), DNAString( search.table$g.seqs.up[1]) , fixed=FALSE) # 3 hits on strand +
#[kwangmin@10 MSigDB-v3.0]$ grep PITX2 c3.tft.v3.0.symbols.gmt.morifSeq.txt
#V$PITX2_Q2 CTCNANGTGNY
#GGATTA_V$PITX2_Q2 YATGNWAAT
#[kwangmin@10 MSigDB-v3.0]$ grep SOX9 c3.tft.v3.0.symbols.gmt.morifSeq.txt
#V$SOX9_B1 SKYTAAAAATAACYCH
#CATTGTYY_V$SOX9_B1 YATGNWAAT
##
## for detailed match position, use matchPattern()
##
# matchPattern( DNAString("AAAMCT"), DNAString( search.table$g.seqs.up[1]) , fixed=F) # IUPAC 3 hits on strand
##
## IUPAC ambiguity codes
##
pattern = c( "[A|T][A|T]CAA[A|T]G", "CTCANGTGNY", "YATGNWAAT", "SKYTAAAAATAACYCH" )
##
## converted into regular expression for grep( perl=T)
##
pattern = c( "[A|T][A|T]CAA[A|T]G", "CTCA[G|A|T|C]GTG[G|A|T|C][C|T]", "[C|T]ATG[G|A|T|C][A|T]AAT", "[C|G][G|T][C|T]TAAAAATAAC[C|T]C[A|C]" )
up = NULL
dn = NULL
upPromPatt = NULL
dnPromPatt = NULL
upProm = NULL
dnProm = NULL
for( i in 1:length(pattern)) {
promoter.up = grep( pattern[i], search.table$g.seqs.up, perl=T)
if( length(promoter.up) > 0) {
for( j in promoter.up) {
upPromPatt = c( upPromPatt, pattern[i])
upProm = c( upProm, as.vector(search.table$g.sym[j]) )
#print( upProm )
}
up = rbind( up, cbind( upPromPatt, upProm ) )
upPromPatt = NULL
upProm = NULL
}
promoter.dn = grep( pattern[i], search.table$g.seqs.dn, perl=T)
if( length(promoter.dn) > 0 ) {
for( k in promoter.dn) {
dnPromPatt = c( dnPromPatt, pattern[i])
#print( dnPromPatt)
dnProm = c( dnProm, as.vector(search.table$g.sym[k]) )
}
dn = rbind( dn, cbind( dnPromPatt, dnProm ) )
dnPromPatt = NULL
dnProm = NULL
}
}
#write.table( up, file="shSOX9-and-shPITX2-both-2.5xUP-up2K-search.xls", sep="\t", row.names=F, quote=F)
#write.table( dn, file="shSOX9-and-shPITX2-both-2.5xUP-dn2K-search.xls", sep="\t", row.names=F, quote=F)
write.table( up, file="shSOX9-and-shPITX2-both-0.5xDN-up2K-search.xls", sep="\t", row.names=F, quote=F)
write.table( dn, file="shSOX9-and-shPITX2-both-0.5xDN-dn2K-search.xls", sep="\t", row.names=F, quote=F)
q()
bsgenome.Hsapiens.UCSC package
Examples
http://svitsrv25.epfl.ch/R-doc/library/Biostrings/html/reverse.html
Get Exon seq from ID
t = read.csv( file=file, sep="\t", header=T)
exon.id = as.character(t[,28] )
#exon.id = exon.id[1:4]
#exon.id[5] = "jlkfljs"
exon.id.all = unlist( strsplit( exon.id, "[|]", perl=T) )
# listDatabasets( e65 )
# listFilters( e65 )
# listAttributes( e65 )
myMart = getBM( attributes=c("ensembl_exon_id","gene_exon"), mart=e65, filter=c("ensembl_exon_id"), value=exon.id.all )
exon.seqs = NULL
for( exon in exon.id ) {
array = unlist( strsplit( exon, "[|]", perl=T) )
if( length(array) == 0 ) {
exon.seqs = c( exon.seqs, " | -" )
} else {
concat = NULL
for( id in array ) {
seq = myMart[ grep( paste("^",id,"$",sep=""), myMart$ensembl_exon_id), ]$gene_exon
if( length(seq) == 0 ) {
seq = "-"
}
concat = paste( concat, seq, sep=" | " )
}
exon.seqs = c( exon.seqs, concat )
}
}
t$exon.seqs = exon.seqs
이메일로 전송BlogThis!X에 공유Facebook에서 공유
라벨:
Informatics
source( "http://www.bioconductor.org/biocLite.R") biocLite(’biomaRt’)
library(biomaRt) listMarts()
ensembl = useMart("ensembl") datasets = listDatasets(ensembl) ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl") attributes=listAttributes(ensembl) filters=listFilters(ensembl)
affyids = c("211550_at","202431_s_at","206044_s_at") annotation = getBM(c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band","strand"), filters="affy_hg_u133_plus_2", values=affyids,mart = ensembl) print(annotation)
illuminaIDs = c("ILMN_1728071","ILMN_1662668") goAnnot = getBM(c("illumina_humanwg_6_v2", "go_biological_process_id","go_biological_process_linkage_type"), filters="illumina_humanwg_6_v2", values=illuminaIDs, mart = ensembl) print(goAnnot[1:5,])
diab=getBM(c("ensembl_gene_id","hgnc_symbol"),filters=c("mim_morbid_accession","go"), values=list(c("125853","222100"),"GO:0003700"),mart=ensembl) print(diab)
miRNA = getBM(c("mirbase_id","ensembl_gene_id","start_position","chromosome_name"), filters=c("chromosome_name","with_mirbase"), values=list(13,TRUE), mart=ensembl) miRNA[1:5,]
filterOptions("snptype_filters",ensembl) entrez = getBM("entrezgene",filters=c("chromosome_name","snptype_filters"), values=list(22,"NON_SYNONYMOUS_CODING"),mart=ensembl) entrez[1:5,]
seq = getSequence(id="CDH1", type="hgnc_symbol",seqType="gene_exon", mart = ensembl) seq[1,]
promoter=getSequence(id=c("APC","CUL1"), type="hgnc_symbol",seqType="coding_gene_flank",upstream =2000, mart=ensembl)
human=useMart("ensembl", dataset="hsapiens_gene_ensembl") chicken=useMart("ensembl", dataset="ggallus_gene_ensembl") mapping = getLDS(attributes=c("affy_hg_u95av2","hgnc_symbol"), filters="affy_hg_u95av2", values=c("1888_s_at","1434_at"),mart=human,attributesL="affy_chicken", martL=chicken)
listMarts(host="may2009.archive.ensembl.org/biomart/martservice/") ensembl54=useMart("ENSEMBL_MART_ENSEMBL", host="may2009.archive.ensembl.org/biomart/martservice/") listDatasets(ensembl54) ensembl54=useMart("ENSEMBL_MART_ENSEMBL", host="may2009.archive.ensembl.org/biomart/martservice/", dataset='hsapiens_gene_ensembl')
snp=useMart("snp", dataset="hsapiens_snp") out=getBM(attributes=c("refsnp_id","allele","chrom_start"), filters=c("chr_name","chrom_start","chrom_end"), values=list(8,148350,158612), mart=snp) out[1:5,]
hapmap = useMart("HapMap_rel27", dataset="hm27_variation_yri") yri = getBM(c("chrom","start","alleles","ref_allele","ref_allele_freq","other_allele_freq"),filters=c("chrom","coding_nonsynon"),values=list("chr19",TRUE),mart=hapmap) head(yri)
cosmic=useMart("CosmicMart",dataset="COSMIC47") mut = getBM(c("sample_name","gene_name","aa_mut_syntax","mut_type_cds","zygosity"),filters="sample_name",values=c("MCF7","BT474"),mart=cosmic) head(mut)
reactome = useMart("REACTOME", dataset="pathway") ids = getBM(c("referencedatabase_uniprot","_displayname"),filters=c("_displayname","species_selection"),value=list(c("DNA Repair","Signaling by WNT","Muscle contraction"),"Homo sapiens"), mart=reactome)
gramene = useMart("ENSEMBL_MART_ENSEMBL", dataset="athaliana_eg_gene") getBM(c("ensembl_gene_id","external_gene_id","description","start_position","end_position"), filters=c("chromosome_name","start","end"), values=list("1", "30000","41000"), mart=gramene)
worm = useMart("wormbase195", dataset="wormbase_rnai") pheno = getBM(c("rnai","phenotype_primary_name"), filters="gene", values="WBGene00006763", mart=worm) head(pheno)
Scientist. Husband. Daddy. --- TOLLE. LEGE
외부자료의 인용에 있어 대한민국 저작권법(28조)과 U.S. Copyright Act (17 USC. §107)에 정의된 "저작권물의 공정한 이용원칙 | the U.S. fair use doctrine" 을 따릅니다. 저작권(© 최광민)이 명시된 모든 글과 번역문들에 대해 (1) 복제-배포, (2) 임의수정 및 자의적 본문 발췌, (3) 무단배포를 위한 화면캡처를 금하며, (4) 인용 시 URL 주소 만을 사용할 수 있습니다. [후원 | 운영] [대문으로] [방명록] [옛 방명록] [티스토리 (백업)] [신시내티]
-