<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
</script>
本文于r format(Sys.Date(), "%Y-%m-%d")更新。 如发现问题或者有建议,欢迎提交 Issue
{r setup, include=FALSE} knitr::opts_chunk$set(eval = FALSE)
主要内容
- 数据(data)
- 映射(mapping)aesthetic attributes, 包括颜色、形状、大小等
- 几何对象(geom)geometric object, 包括点、线、条形等
- 统计变换(stat)statistical transformation
- 坐标系(coord)coordinate system
- 分面(facet)
- 标度(scale)
- 主题(theme)
- 位置(position)
- 存储和输出
结构参考 @安建才2018 。
新增
geom_segment可以在图中做很多箭头的改进- How to draw an arrow in the middle of the segment?
以数据mtcars为例。
{r} library(skimr) skim(mtcars) head(mtcars)
data
用%+%更换数据
{r} library(tidyverse) p <- mtcars %>% ggplot(aes(mpg, wt, col = cyl)) + geom_point() p
{r} p %+% (mtcars %>% mutate(mpg = mpg^2))
注意看x轴的比例尺子都变换了。
mapping
mapping 和 geom_*不同之处
{r} mtcars %>% ggplot(aes(mpg, wt, col = 'red')) + geom_point() mtcars %>% ggplot(aes(mpg, wt)) + geom_point(col = 'red')
注意看到第一幅图,因为是对data进行color,因此系统默认这是一个legend,然而在geom_*中的设定是不考虑的,因此没有legend。 <!-- 这里可以加一行代码将`legend`删除。 -->{=html}
<!-- ``` -->
<!-- mtcars %>% -->
<!-- ggplot(aes(mpg, wt, col = 'red')) + -->
<!-- geom_point() + -->
<!-- theme(legend.key = element_blank(), -->
<!-- legend.text = element_blank(), -->
<!-- legend.title = element_blank()) -->
<!-- ``` -->
cut_*
cut_interval(x, n)分成n个bin,其中每个bin的长度相等。cut_width(x, width)分bin,bin的长度为width。cut_number(x, n = 10)分10个bin,bin之间数量近似相等
cut_number的优化
cut_number makes n groups with (approximately) equal numbers of observations. 因此不好。 这里bin = round(fst_givenamount/500)*500给到的是,离值最近的500的bin。 这些都不好,而是把非常高的剔除来,$n-2$的分箱。
有时候某个值占比特别高,比如30%,切10bins时根本不可能靠cut_number提取。 可以把这个值单独做一个bin,然后,再取用8bins。
geom
误差线比较好的总结
参考 http://mp.weixin.qq.com/s/lGVgCsm1sMRdVOUJWiJQTg
离散型变量+区间: geom_errorbar(), geom_linerange() 离散型变量+区间+中间值: geom_crossbar(), geom_pointrange() 连续型变量+区间: geom_ribbon() 连续型变量+区间+中间值: geom_smooth(stat="identity")
geom_crossbar
{r} data.frame( trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, 4), group = factor(c(1, 2, 1, 2)), upper = c(1.1, 5.3, 3.3, 4.2), lower = c(0.8, 4.6, 2.4, 3.6) ) %>% ggplot(aes(trt, resp, colour = group)) + geom_crossbar(aes(ymin = lower, ymax = upper), width = 0.2)
也就是均值上下加上标准差之类的误差量。 非常直观,适合数据处理完后的展示。
geom_errorbarh
geom_ $\to$ error $\to$ bar $\to$ h
```{r} df <- data.frame( trt = factor(c(1, 1, 2, 2)), resp = c(1, 5, 3, 4), group = factor(c(1, 2, 1, 2)), se = c(0.1, 0.3, 0.3, 0.2) )
Define the top and bottom of the errorbars
p <- ggplot(df, aes(resp, trt, colour = group)) p + geom_point() + geom_errorbarh(aes(xmax = resp + se, xmin = resp - se))
#### `psych`更好的选择
可以多变量展示数据,具体参考
[因子分析 Factor Analysis](https://jiaxiangli.netlify.com/2018/10/factor-analysis/)
```
library(psych)
library(data.table)
gcbs <-
fread('../../../picbackup/gcbs.csv'
,encoding = 'UTF-8') %>%
as.data.frame()
# error.dots 否则 error.dots 报错
# Error in `[.data.frame`(x, i) : undefined columns selected
error.bars(gcbs)
error.dots(gcbs,var=names(gcbs))
geom_ribbon
{r} LakeHuron
```{r} # Generate data huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron)) h <- ggplot(huron, aes(year))
Add aesthetic mappings
h + geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = “grey70”) + geom_line(aes(y = level))
这就是欧巴需要的误差曲线图。
### 加入虚线
geom_line(aes(linetype=supp))
### geom_segment
```
library(tidyverse)
# 生成测试数据
df=data.frame(
Phylum=c("Ruminococcaceae","Bacteroidaceae","Eubacteriaceae","Lachnospiraceae","Porphyromonadaceae"),
GroupA=c(37.7397,31.34317,222.08827,5.08956,3.7393),
GroupB=c(113.2191,94.02951,66.26481,15.26868,11.2179)
)
# 计算连线起始点Y轴坐标,即累计丰度的值
link_dat <- df %>%
arrange(by=desc(Phylum)) %>%
mutate(GroupA=cumsum(GroupA), GroupB=cumsum(GroupB))
# 数据格式转换,宽表格转换为ggplot2使用的长表格
df.long <- df %>% gather(group, abundance, -Phylum)
# 或者使用reshape2的melt函数
# df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
# 绘图,堆叠柱状图+组间连线
ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) +
geom_bar(stat = "identity", width=0.5, col='black') +
geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=GroupA, yend=GroupB))
cumsum配合geom_bar()中的stat = "identity"使用,表达累积状态。 geom_segment作为连线,x的坐标跟geom_bar中的width设置有关, 第一个等于1+width/2, 第二个等于2-width/2。
```{r} # 画三个组间比较 library(reshape2)
读生一个测试数据宽表格
df=data.frame( Phylum=c(“Ruminococcaceae”,“Bacteroidaceae”,“Eubacteriaceae”,“Lachnospiraceae”,“Porphyromonadaceae”), GroupA=c(37.7397,31.34317,222.08827,5.08956,3.7393), GroupB=c(113.2191,94.02951,66.26481,15.26868,11.2179), GroupC=c(123.2191,94.02951,46.26481,35.26868,1.2179) )
melt转换为长表格为ggplot2绘图通用格式
geom_segment添加直线和曲线,arrange按门水平名称字母降序排列;cumsum先将数值累计,再用mutate取代;现在己有两组间的高度位置,再设置X轴位置1.25, 1.75, 和Y位置
ggplot(melt(df), aes(x=variable, y=value, fill=Phylum)) + geom_bar(stat = “identity”, width=0.5, col=‘black’) + theme_classic()+ geom_segment(data=df %>% arrange(by=desc(Phylum)) %>% mutate(GroupA=cumsum(GroupA)) %>% mutate(GroupB=cumsum(GroupB)), aes(x=1.25, xend=1.75, y=GroupA, yend=GroupB))+ geom_segment(data=df %>% arrange(by=desc(Phylum)) %>% mutate(GroupB=cumsum(GroupB)) %>% mutate(GroupC=cumsum(GroupC)), aes(x=2.25, xend=2.75, y=GroupB, yend=GroupC)) # 添加theme_classic()修改主题样式,这个经典主题我更喜欢 # x和xend分别为起始和终止,1,2组间X值起始分别为1.25和1.75,2,3组间则为2.25和2.75
多加一条`geom_segment`完成三组比较。
```
# 三组或更多组的画法,只需添加数据即可
library(tidyverse)
df <- data.frame(
Phylum=c("Ruminococcaceae","Bacteroidaceae","Eubacteriaceae","Lachnospiraceae","Porphyromonadaceae"),
GroupA=c(37.7397,31.34317,222.08827,5.08956,3.7393),
GroupB=c(113.2191,94.02951,66.26481,15.26868,11.2179),
GroupC=c(123.2191,94.02951,46.26481,35.26868,1.2179),
GroupD=c(37.7397,31.34317,222.08827,5.08956,3.7393)
)
df.long <- df %>% gather(group, abundance, -Phylum)
# 组间连线数据:
# 假设第一列是Phylum
link_dat <- df %>%
arrange(by=desc(Phylum)) %>%
mutate_if(is.numeric, cumsum)
bar.width <- 0.7
link_dat <- link_dat[, c(1,2,rep(3:(ncol(link_dat)-1),each=2), ncol(link_dat))]
link_dat <- data.frame(y=t(matrix(t(link_dat[,-1]), nrow=2)))
link_dat$x.1 <- 1:(ncol(df)-2)+bar.width/2
link_dat$x.2 <- 1:(ncol(df)-2)+(1-bar.width/2)
ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) +
geom_bar(stat = "identity", width=bar.width, col='black') +
geom_segment(data=link_dat,
aes(x=x.1, xend=x.2, y=y.1, yend=y.2), inherit.aes = F)
这是多组的方案。
geom_errorbar
ggplot中插入geom_errorbar 可以帮助我们,表现一次回归函数的,均值和方差。
```{r message=FALSE, warning=FALSE} library(tidyverse)
df <- data.frame(x = 1:10, y = 1:10, ymin = (1:10) - runif(10), ymax = (1:10) + runif(10), xmin = (1:10) - runif(10), xmax = (1:10) + runif(10))
p <- ggplot(data = df,aes(x = x,y = y)) + geom_point() + geom_errorbar(aes(ymin = ymin,ymax = ymax)) + geom_errorbarh(aes(xmin = xmin,xmax = xmax))
p
### How to draw an arrow in the middle of the segment?
```
arrow_data <- data.frame(x1 = 2.62, x2 = 3.57, y1 = 21.0, y2 = 15.0)
mtcars %>%
ggplot(aes(x = wt, y = mpg)) +
geom_point() +
geom_curve(
aes(x = x1, y = y1, xend = x2, yend = y2, col = "red"), data = arrow_data
) +
geom_segment(
aes(x = x1, y = y1, xend = x2, yend = y2, col = "green"), arrow = arrow(), data = arrow_data
)
下面的方式是把箭头放在中间,原理就是把两条线分两半进行重建。
{r} mtcars %>% ggplot(aes(x = wt, y = mpg)) + geom_point() + geom_curve( aes(x = x1, y = y1, xend = x2, yend = y2, col = "curve"), data = arrow_data ) + geom_segment( aes(x = (x1+x2)/2, y = (y1+y2)/2, xend = x2, yend = y2, col = "segment"), data = arrow_data ) + geom_segment( aes(x = x1, y = y1, xend = (x1+x2)/2, yend = (y1+y2)/2, col = "segment"), arrow = arrow(), data = arrow_data, show.legend=FALSE )
geom_segment可以在图中做很多箭头的改进
{r eval=F} @online{Dancho2017Product, author = {Matt Dancho}, title = {Sales Analytics: How To Use Machine Learning To Predict And Optimize Product Backorders}, year = 2017, url = {http://www.business-science.io/business/2017/10/16/sales_backorder_prediction.html}, urldate = {2018-05-02} }
{r} mtcars %>% ggplot(aes(x = mpg, y = disp, col = factor(cyl))) + geom_point() + geom_segment(aes(x= 12.5, y = 400, xend = 12.5+0.5, yend = 400+20), size = 1) + annotate("text", x = 15-1, y = 420, size = 3, label = "蓝\n色")
ggplot交互
鼠标点击图标,会有解释。 参考Make ‘ggplot2’ Graphics Interactive <U+2022> ggiraph package 。
{r message=FALSE, warning=FALSE, include=FALSE} library(ggiraph) library(tidyverse)
{r} ggiraph( (mtcars %>% ggplot(aes(x = mpg, y = disp, col = as.factor(cyl))) + geom_point_interactive(aes(tooltip = paste("cyl = ",cyl)), size = 2) ) %>% print() )
tooltip鼠标点击的标签。
aes_string和names在ggplot中的使用
mlp_p_add_15 <- mlp_p
for (i in 1:15){
mlp_p_add_15 <- mlp_p_add_15 +
geom_line(data = mlp_model_pred_table,
aes_string(y = names(mlp_model_pred_table)[i]))
}
mlp_p_add_15
geom_ma()
tidyquant包 [@tidyquant] 提供了geom_ma函数,方便画移动平滑线。
library(tidyquant)
AAPL <- tq_get("AAPL")
AAPL %>%
ggplot(aes(x = date, y```` = adjusted)) +
geom_line() + ## Plot stock price
geom_ma(n = 50) + ## Plot 50-day Moving Average
geom_ma(n = 200, color = "red") + ## Plot 200-day Moving Average
coord_x_date(xlim = c(today() - weeks(12), today()),
ylim = c(100, 130)) ## Zoom in
geom_scatterpie()
@杜雨连环饼图 采用scatterpie包的geom_scatterpie函数构建多个饼状图。
{r} library(tidyverse) library(scatterpie) library(Cairo) pie_data <- data.table::data.table( Year = 2004:2011, Dummy = seq(5,40,5), Data = rnorm(n = 8,mean = 10,sd = 5), S1 = c(1,1,3,1,2,3,4,2), S2 = c(1,1,2,1,2,2,2,1), S3 = c(1,1,3,1,4,3,4,1), S4 = c(1,1,5,1,5,5,2,.5), S5 = c(1,2,5,1,1,5,1,.5) ) color1<-c("#FF2D2D","#F79646","#4BACC6","#FFC000","#92D050") color2<-c("#17375E","#23538D","#558ED5","#8EB4E3","#C6D9F1")
{r} pie_data %>% ggplot() + geom_line(aes(x=Dummy,y=Data,group=1),col="#085264",size=.8) + geom_scatterpie(data=pie_data,aes(x=Dummy,y=Data,r=Dummy/10),cols=colnames(pie_data)[4:8],color=NA) + # ylim(0,25)+ scale_fill_manual(values=color1) + scale_x_continuous(breaks=pie_data$Dummy,labels=c(2004:2011)) + guides( fill=guide_legend(label.position ="top"))+ theme( axis.title=element_blank(), legend.title=element_blank(), panel.background=element_blank(), axis.line=element_line(), axis.ticks=element_line(), legend.direction="horizontal", legend.position=c(0.15,0.9), ) + coord_equal()
ylim(0,25)的设置保持了圆不变成椭圆。coord_equal()也可以完成这个目的,但是可能存在图例遮挡的情况。r$\in [1,10]$比较合理,不然太大太小,这里可以设置变量
stat
统计变换即对数据进行统计变化, 通常以某种方式对数据信息进行汇总。
stat_unique
{r} ggplot(mtcars, aes(vs, am)) + geom_point(alpha = 0.1) ggplot(mtcars, aes(vs, am)) + geom_point(alpha = 0.1, stat = "unique")
注意看颜色。 后面一个图stat = "unique"加入后,明显颜色一直很浅,是因为alpha = 0.1。 然而第一幅图左上角很深,是因为有很多重复值。
stat_summary
{r} d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point() d + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2) d + stat_summary(fun.y = "median", colour = "red", size = 2, geom = "point")
stat_qq_line
- 相关理论见Q-Q 图理解
- 安装报错见安装包报错的debug步骤
{r eval=F} # devtools::install_github("tidyverse/ggplot2") library("ggplot2") library("caret") df <- data.frame(y = rt(200, df = 5)) p <- ggplot(df, aes(sample = y)) p + stat_qq() + stat_qq_line()
QQ Plot 查看分类变量下不同分布 [Kassambara2016,pp.59]
{r eval=F} library(tidyverse) p <- ggplot(mtcars, aes(sample=mpg)) # Basic plot p + stat_qq() # Change point shapes by groups # Use custom color palettes p + stat_qq(aes(shape = as.factor(cyl), color = as.factor(cyl)))+ scale_color_manual(values=c("#00AFBB", "#E7B800", "#FC4E07"))
facet
横纵的选择
{r} library(tidyverse) mtcars %>% ggplot(aes(x = wt, y = qsec)) + geom_point() + facet_wrap(~ cyl, scales = 'free', ncol = 1)
ncol = 1用于facet_wrap,可以限定横纵的选择。
scale
理解方式
scales 衡量数据的映射方式,举例方便理解 [@宏基因组scales]。
{r} mpg %>% ggplot(aes(x = displ, y = hwy)) + geom_point(aes(col = class)) # scale_x_continuous() + # scale_y_continuous() + # scale_color_discrete()
注意最后三个scale_*条件是默认的,一一解释,
scale_x_continuous(): 以连续变量展示,但是因为x本身是连续变量,因此转换为本身scale_y_continuous(): 以连续变量展示,但是因为x本身是连续变量,因此转换为本身scale_color_discrete():col变量class作为颜色,以离散变量展示
以下做一些变换,
{r} mpg %>% ggplot(aes(x = displ, y = hwy)) + geom_point(aes(col = class)) + scale_x_sqrt(quote(a + M ^ e)) + scale_color_brewer()
scale_x_sqrt():x按照$\sqrt{x}$的方式展示。scale_color_brewer(): 颜色按照这一规则映射,scale_colour_brewer(..., type = "seq", palette = 1, direction = 1),参数type = "seq"表示按照连续变量处理,因此颜色数连续的蓝色。quote(a + M ^ e)可以输入简易公式。
调整比例尺
labels = comma算一种。
{r} diamonds %>% ggplot(aes(x = carat, y = price)) + geom_point() + scale_x_log10() + scale_y_log10() + coord_trans(y = scales::exp_trans(10)) + theme(legend.position = 'none')
scales::exp_trans(10)相当于对$y$的比例尺进行转换
$$y \to 10^y$$
给时间序列自定义标签
使用ggplot2给时间序列作图时,有自定义时间标签的需求,比如希望时间标签更细致,举例。
模拟建立数据
{r} library(tidyverse) library(lubridate) data_table <- data_frame(a = seq(1:300), b = today() + a) head(data_table)
{r} data_table %>% ggplot(aes(x = b, y = a)) + geom_point()
想看更加细致的月份数据
使用scale_x_date, 参考 * Time Series 05: Plot Time Series with ggplot2 in R – NEON Data Skills
{r} data_table %>% ggplot(aes(x = b, y = a)) + geom_point() + scale_x_date(date_breaks = '1 month')
limits看最近时间
scale_x_date(limits = c(as.Date('2018-07-27'), Sys.Date()))
建立更加整洁的标签
转置标签方向。标签,转置标签方向。
{r} data_table %>% ggplot(aes(x = b, y = a)) + geom_point() + scale_x_date(date_breaks = '1 month') + theme(axis.text.x = element_text(angle = 70, hjust = 1))
xlim中写<符号
如mutate(bin = fct_reorder(paste0("<=",as.factor(as.numeric(bin3))),as.numeric(bin3))) %>%中 使用paste0函数。
自定义轴坐标的bin和命名
axs + scale_x_continuous(breaks = c(2000, 4000), labels = c("2k", "4k")
修改legend
scale_colour_discrete(name = "图例名称", breaks = c("图例名称1","图例名称2"), labels = c("图例新名称1","图例新名称1"))
stack图打标签的方法
{r eval=FALSE, include=FALSE} ... ggplot(aes(x = bin_income_2year, y = value, col = key, fill = key)) + geom_bar(position = "stack", stat='identity', alpha = 0.25) + geom_text(aes(label=pctg), position=position_stack(), size=4) + theme(axis.text.x = element_text(angle = 70, hjust = 1))
关键是position=position_stack()。 但是会改变y轴的刻度。
横线加标签
- R ggplot2: Labelling a horizontal line on the y axis with a numeric value - Stack Overflow 横线加标签。 用于用户价值取上下限的标签,样本占比。
theme
最好看的集成方案
{r} theme_ilo <- function(){ theme_minimal() + theme( # text = element_text(family = "Bookman", color = "gray25"), plot.subtitle = element_text(size = 9), plot.caption = element_text(color = "gray30"), # plot.background = element_rect(fill = "gray95"), plot.margin = unit(c(5, 10, 5, 10), units = "mm"), axis.title.x = element_text(size=12,face = "bold"), axis.title.y = element_text(size=12,face = "bold"), # x和y的命名,要加粗,ppt才好看 axis.text.x = element_text(size=7, angle = 70, hjust = 1), # 控制axis字体大小,7号大小最好 axis.text.y = element_text(size=7), legend.title=element_blank() ) }
theme_ilo() +
scale_fill_brewer(palette = "Greens") +
# 为了分层好看
scale_color_brewer(palette = "Greens") +
# 为了分层好看
theme(text=element_text(family="STKaiti"))
中文乱码解决方案
theme(text=element_text(family="STKaiti")) + 加入这样一行代码就好了,因为限定了text中, element_text的属性为family="STKaiti"。 这样就可以处理中文乱码的问题了。
Error in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : Polygon edge not found
-
去font book 里面选择好字体,大多数原因是没有这个字体[@Zdeb2016; @awags12018]。
-
一般是RMarkdown 无法Preview,因此先保存图片,再
knitr::include_*# 看来中文显示出现了问题。 # geom_text(family=“STKaiti”) + 另外指定 # theme(text=element_text(family=“STKaiti”)) 没有指定元素是中文。
参考 这篇CSDN博客
不显示legend
{r} mtcars %>% ggplot(aes(x = wt, y = qsec, col = as.factor(cyl))) + geom_point() + theme(legend.position = 'none')
theme(legend.position = 'none')中legend.position = 'none'使得不显示legend,因为有时候就是多余。
一个集成方案
{r} mpg %>% ggplot(aes(x = cty, y = hwy, col = as.factor(cyl))) + geom_jitter() + geom_abline(col = "grey50", size = 2) + # 添加labs labs( x = "City mileage/galleon", y = "Highway mileage/gallon", title = "Highway and city mileage are highed correlated", subtitle = "add some comments", color = "Change legend name", caption = "Data Source: mpg ) + # 点的颜色真难看。 scale_color_brewer(type = "seq", palette = "Spectral") + # adj. [物]光谱的;幽灵的;鬼怪的 # /'sp<U+025B>ktr<U+0259>l/ # 下面开始修改theme,不能忍受背景是灰色的 theme_minimal() + theme( plot.title = element_text(face = "bold", size = 12), # bold好看些 plot.subtitle = element_text(size = 10) ) + theme( plot.title = element_text(hjust = 0.5), # Horizontal justification (in [0, 1]) # 因此居中就是0.5 plot.subtitle = element_text(hjust = 0.5), # Vertical justification (in [0, 1]) aspect.ratio = 3/4, # 限定纵横比,放入ppt )
scale_color_brewer非常好的美化函数。
for loop with labs
{r} for (i in unique(mtcars$cyl)){ p1 <- mtcars %>% filter(cyl == i) %>% ggplot(aes(x = mpg, y = disp)) + geom_point() + labs( title = paste("cyl = ", i) ) ggsave(paste(i,"pic.png",sep = "_"), device = "png") print(p1) }
ggsave(paste(i,"pic.png",sep = "_"), device = "png") 保存图片,方便复制粘贴。
@Rodriguesggplot2purrr 给出一个不用展示plot,但是输出的方法,很稳定,数据的处理都在nested table。
{r eval=F, message=FALSE, warning=FALSE} # devtools::install_github('amarjen/pwt9') # https://github.com/amarjen/pwt9 # data("pwt9") pwt9.0 <- read_csv("https://raw.githubusercontent.com/amarjen/pwt9/master/data-raw/pwt90.csv") country_list <- c("France", "Germany", "United States of America", "Luxembourg", "Switzerland", "Greece") small_pwt <- pwt9.0 %>% filter(country %in% country_list) small_pwt <- small_pwt %>% mutate(country = factor(country, levels = country_list, ordered = TRUE)) library(ggthemes)
```{r} plots <- small_pwt %>% group_by(country) %>% nest(.key = dat) %>% mutate(plot = map2(dat, country, ~ggplot(dat = .x) + theme_tufte() + geom_line(aes(y = avh, x = year)) + ggtitle(.y) + ylab(“Year”) + xlab(“Average annual hours worked by persons engaged”)))
plots$plot # map2( # file.path(“test_dropout_folder”,paste0(plots$country,".pdf")),
plots$plot,
ggsave)
## 配色交互
* [daattali/colourpicker: A colour picker tool for Shiny and for selecting colours in plots (in R)](https://github.com/daattali/colourpicker)
配色的福音啊

```
CPCOLS <- c("#7A67EE", "#4EEE94", "#e31a1c")
ggplot(iris, aes(Sepal.Length, Petal.Length)) +
geom_point(aes(col = Species)) +
scale_colour_manual(values = CPCOLS)
交互调整theme
- calligross/ggthemeassist: An RStudio addin for ggplot2 theme tweaking ggtheme的加强啊!!!卧槽!!! 都不需要背诵了!!! 今天真的是发现了宝!!
{r} library(ggThemeAssist)
{r} p1 <- mtcars %>% ggplot(aes(x = mpg, y = disp, col = as.factor(cyl))) + geom_point() p1 + theme(plot.subtitle = element_text(vjust = 1), plot.caption = element_text(colour = "darkblue", vjust = 1), axis.title = element_text(colour = "darkblue"), plot.title = element_text(face = "bold", colour = "darkblue"), legend.position = "none") +labs(title = "mpg, disp 和 cyl之间的关系", colour = "cyl", subtitle = "这是用ggplot Theme Assistant调整的,代码肯定很难看的,需要自己再删减一下的。", caption = "数据来源: mtcars") + theme_minimal() + theme(text=element_text(family="STKaiti"))
这个包是针对初学者的,实际上熟悉代码后,不需要这个的,太简单了。 除非是just这种非常小的调节差不多。
让图片的label加粗
例如geom_bar()中,加入fontface="bold"
quote
{r} mtcars %>% ggplot(aes(x = mpg, y = disp^2, col = cyl)) + geom_point() + labs(color = '') + # 不要legend的名字 scale_y_continuous(quote(disp ^ two)) + # 平方体现 theme(legend.position = "none")
ggplot 现在薄弱的环节是 ggtheme、 theme。 ggplot书上p.201。
How to annotate() ggplot with latex
latex2exp::TeX函数实现图中加latex,记得是\\号,目前不能解决斜体的问题[@Gegzna2016]。
{r eval=F} library(latex2exp) library(ggplot2) mtcars %>% ggplot(aes(x = mpg/disp,y=qsec)) + geom_point() + annotate("text", x = .4, y = 20, label = "paste(italic(R) ^ 2, \" = .75\")", parse = TRUE) + annotate("text", x = .2, y = 20, label = TeX("$\\alpha$"), parse = TRUE) + annotate("text", x = .1, y = 20, label = TeX("Formula: $\\frac{2hc^2}{\\lambda^\\beta}$"), parse = TRUE)
插入数学:公式 [@Slowikowski2018]
{r eval=F} d <- data.frame( x = c(1, 2, 2, 1.75, 1.25), y = c(1, 3, 1, 2.65, 1.25), math = c( NA, "integral(f(x) * dx, a, b)", NA, "lim(f(x), x %->% 0)", NA ) ) library(ggrepel) ggplot(d, aes(x, y, label = math)) + geom_point() + geom_label_repel( parse = TRUE, # Parse mathematical expressions. size = 8, box.padding = 2 )
annotate函数中的label参数表达绝对值符号
y == abs(x)
加入rectangle [@Evans2013]
annotate("rect", xmin=100, xmax=200, ymin=0, ymax=Inf, alpha=0.2, fill="red")
Combine
grid.arrange
{r} library(gridExtra) p1 <- mtcars %>% ggplot(aes(x= mpg, y = disp)) + geom_point() p2 <- mtcars %>% ggplot(aes(x= disp, y = mpg)) + geom_point() grid.arrange(p1,p2)
cowplot [Kassambara2016,pp.205-211]
```{r eval=F} library(cowplot) library(graphics) data(ToothGrowth)
p <- ggplot(ToothGrowth %>% mutate(dose = as.factor(dose)), aes(x = dose, y = len)) my3cols <- c("#E7B800", “#2E9FDF”, “#FC4E07”)
Box plot (bp)
bxp <- p + geom_boxplot(aes(color = dose)) + scale_color_manual(values = my3cols) bxp # Dot plot (dp) dp <- p + geom_dotplot(aes(color = dose, fill = dose) ,binaxis=‘y’, stackdir=‘center’) + scale_color_manual(values = my3cols) + scale_fill_manual(values = my3cols) dp lp <- economics %>% ggplot(aes(x = date, y = psavert)) + geom_line(color = “#E46726”) lp plot_grid(bxp, dp, lp ,labels = c(“Figure 1”, “Figure 2”, “Figure 3”) ,ncol = 2, nrow = 2) plot2by2 <- ggdraw() + draw_plot(bxp, x = 0, y = .5, width = .5, height = .5) + draw_plot(dp, x = .5, y = .5, width = .5, height = .5) + draw_plot(lp, x = 0, y = 0, width = 1, height = 0.5) + draw_plot_label(label = c(“Figure 1”, “Figure 2”, “Figure 3”) ,x = c(0, 0.5, 0), y = c(1, 1, 0.5), size = 15) plot2by2
使用`cowplot::plot_grid`函数进行合并。
`draw_plot_label`中的`size`描述label的大小。

如图,`x`和`y`的位置是图形的左下角作为坐标,`width`和`height`描述图片的形状。
```
save_plot(
"plot2by2.png",
plot2by2,
ncol = 2, # we're saving a grid plot of 2 columns
nrow = 2, # and 2 rows
# each individual subplot should have an aspect ratio of 1.3
base_aspect_ratio = 1.3
)
这样就可以保存pdf版本了,比较清晰。
{r eval=F} # use save_plot() instead of ggsave() when using cowplot save_plot( "mpg.png ,bxp ,base_aspect_ratio = 1.3 # make room for figure legend )
排序
因子变量排序
因此变量排序,不应该1和10放在一起,解决办法是,
fct_reorder(bin2, as.numeric(bin2)) 对于连续变量, 首先先转为fct,然后用as.numeric来排序。
for loop
循环跑ggplot
循环跑ggplot,其实就是套用for loop的参数就可以了。
for (i in ....) {
}
实现相关矩阵
{r message=FALSE, warning=FALSE, include=TRUE} library(tidyverse) library(reshape2) library(ggthemes)
```{r message=FALSE, warning=FALSE} cor_list <- function(x) { L <- M <- cor(x)
M[lower.tri(M, diag = TRUE)] <- NA M <- melt(M) names(M)[3] <- “points # lower.tri就是i比j大,而已。 L[upper.tri(L, diag = TRUE)] <- NA L <- melt(L) names(L)[3] <-“labels
merge(M, L) }
cor_list(iris[1:4]) # 这里的缺失值有三种 # 1. cor对角线上的 # 2. upper.tri中的一半 # 3. lower.tri中的一半
iris1 <- iris %>% group_by(Species) %>% do(cor_list(.[1:4])) # 这里相当于unnest了,比map函数方便。 iris1 %>% ggplot(aes(x = Var1, y = Var2)) + geom_point(aes(col = labels, size = abs(labels)), shape = 16) + geom_text(aes(x = Var2, y = Var1, # 这里要交叉一下, # 这样文字就在下三角了。 col = points, # size = abs(points), # size 不可以加,不然看不见 # hjust = 2, label = round(labels, 2))) + scale_size(range = c(0, 6)) + # 控制点的大小 scale_color_gradient2(“r”, limits = c(-1, 1)) + scale_y_discrete(””, limits = rev(levels(iris1$Var1))) + # rev控制了factor反着走,这样可以控制图像在上三角还是下三角 scale_x_discrete("") + guides(size = FALSE) + # 没什么用 geom_abline(slope = -1, intercept = nlevels(iris1$Var1) + 1) + coord_fixed() + facet_grid(. ~ Species) + # 不然图像重合了很难看。 labs( caption =“数据来源:iris”, subtitle = “建立相关性矩阵很简单\n抓好x和y轴变量和计算的相关系数就好”, title = “ggplot实现相关矩阵 ) + theme_tufte() + theme(text = element_text(family =“STKaiti”)) + # 为了显示中文 theme(axis.text.y = element_text(angle = 45, hjust = 1), axis.text.x = element_text(angle = 45, hjust = 1), strip.background = element_blank())
# 颜色盘
+ `scale_fill_manual()` for box plot, bar plot, violin plot, etc
+ `scale_color_manual()` for lines and points
# Box plot
bp + scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))
# Scatter plot
sp + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))
![颜色代码[@Chang2018;@常玉俊2017]](../../../picbackup/hextable.png)
以下以`#3366FF`作为例子,进行尝试。
```
mtcars %>%
ggplot() +
geom_point(aes(x = mpg, y = cyl),col = '#3366FF')
Calendar Plot [@戴启立2018]
```{r} library(openair) # load example data from package data(mydata)
basic plot
calendarPlot(mydata, pollutant = “o3”, year = 2003)
# 展示最优的方式
```
library(data.table)
best_score <- fread("Product,score1,score2,score3,score4
A,187.58,4.78,4.5,64.06
B,262,2.38,6,11.88
C,298.65,2.91,6,2.88
D,274.76,4.47,6,-6.42
E,293.48,2,5,61.12
")
market_level <- fread("MaxAve,Statistic,Aggregate
Maximum,score1,362.54
Average,score1,222.05
Maximum,score2,5.99
Average,score2,2.06
Maximum,score3,6
Average,score3,4.43
Maximum,score4,110.34
Average,score4,-7.72
")
这里注意,market_level表的设计保证了可以插入多条横线,带group就好。 [@Perry2018,Task 9]
{r message=FALSE, warning=FALSE, include=T} best_score %>% gather(Statistic,Aggregate,-Product) %>% ggplot(aes(x=Product,y=Aggregate,fill=Product))+ geom_bar(stat="identity") + facet_wrap (~Statistic, nrow = 2, ncol = 2, scales="free_y") + geom_hline(data=market_level, aes(yintercept=Aggregate, group=Statistic, color=MaxAve), size=1) + labs( title="Best five products ,color="Top or bottom ,caption="Jiaxiang Li - jiaxiangli.netlify.com") + scale_fill_hue(l=70) + scale_color_hue(l=20) + theme_minimal() + theme(axis.text.x=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank())
Web-friendly plot
{r} mtcars_p <- mtcars %>% ggplot(aes(mpg)) + geom_histogram() mtcars_p %>% plotly::ggplotly()
plotly::ggplotly可以提供交互的ggplot图 [@McVey2018 * Make a plot web-friendly | R]
制作博客背景图
受到 @Swanson2015 的启发 和 @Chisato2018 的R代码实现,为博客做了背景图。 相关的R包见 JiaxiangBU/add2blog。
<!-- wangxiaoer -> linyueqing -->
<!-- lixiaowu -> lijiaxiang -->
{r} matrix_name <- function(x,n=10) { set.seed(123) x %>% str_to_upper() %>% str_split('') %>% .[[1]] %>% sample(size=n*n,replace=T) %>% matrix( nrow = 10 ,byrow = T ) }
str_split切分字段参考 Stack Overflow
{r} library(lubridate) library(tidyverse) library(data.table) graph_fun <- function(matrix){ matrix %>% as.data.frame %>% rownames_to_column('y') %>% gather(x,text,-y) %>% mutate_at(vars(x,y),as.integer) %>% ggplot(aes(x=x, y=y, color=factor(text))) + geom_text(aes(label=text, color=factor(text)), family="Helvetica", size=2.5) + scale_y_reverse() + theme_void() + scale_color_viridis_d(option="magma", begin=0.2, guide="none") + scale_fill_viridis_d(option="magma", begin=0.2, guide="none") + theme(panel.background = element_rect(fill="#000000")) } lixiaowu_mid_data <- rbind( cbind(matrix_name('wangxiaoer'),matrix_name('wangxiaoer'),matrix_name('wangxiaoer')) ,cbind(matrix_name('wangxiaoer'),matrix_name('lixiaowu'),matrix_name('wangxiaoer')) ,cbind(matrix_name('wangxiaoer'),matrix_name('wangxiaoer'),matrix_name('wangxiaoer')) ) lixiaowu_mid_data %>% `colnames<-`(1:30) %>% `rownames<-`(1:30) %>% graph_fun() wangxiaoer_mid_data <- rbind( cbind(matrix_name('lixiaowu'),matrix_name('lixiaowu'),matrix_name('lixiaowu')) ,cbind(matrix_name('lixiaowu'),matrix_name('wangxiaoer'),matrix_name('lixiaowu')) ,cbind(matrix_name('lixiaowu'),matrix_name('lixiaowu'),matrix_name('lixiaowu')) ) wangxiaoer_mid_data %>% `colnames<-`(1:30) %>% `rownames<-`(1:30) %>% graph_fun()
```{r eval=F} ggsave(filename= paste0( now() %>% ymd_hms() %>% str_remove_all(’ |-|[A-z]|:’) ,’_blog_wechat_wallpic.png’
)
,width=11*golden_ratio,height=11)
ggsave(filename= paste0( now() %>% ymd_hms() %>% str_remove_all(’ |-|[A-z]|:’) ,’_blog_wechat_wallpic.png’
)
,width=11*golden_ratio,height=11)
```