本文于2026-03-28更新。 如发现问题或者有建议,欢迎提交 Issue
项目需求是给在一群运动员内选出最佳的几个,并给出相应的数据支持。
- 数据支持方面,需要给出几个指标供参考,最后做(无监督学习),将指标聚合,最后使用一个综合得分进行选择的评价。
- 决策的方面,最后的数据支持是要展示出来,并知道决策的,因此最后会使用
ggplot包展示选出的最佳运动员,和其他运动员的比较,以此来说服需求方。
本文的数据、Code、思路参考 @Perry2018 。
思路具体如下,
- 创建四个指标
TotalDistance总成绩StandardDev成绩数据的标准差,体现风险Success成功次数diff后三次和前三次的成绩差
- 匹配权重,这里可以用PCA等完成,但不是本文重点,因此直接赋值。
- 找出最好的五位选手,用
max和mean水平来体现这五个选手的优势,这个idea很棒。
清洗数据
## Error in `library()`:
## ! there is no package called 'tidyverse'
## Error in `read_csv()`:
## ! could not find function "read_csv"
# Select the results of interest: women's javelin
javelin <- data %>%
filter(Male_Female =='Female',Event=='Javelin') %>%
select(-Male_Female,-Event)## Error in `data %>% filter(Male_Female == "Female", Event == "Javelin") %>% select(
## -Male_Female, -Event)`:
## ! could not find function "%>%"
## Error in `javelin %>% head()`:
## ! could not find function "%>%"
## Error in `javelin %>% summary()`:
## ! could not find function "%>%"
构建指标
# Assign the tidy data to javelin_long
javelin_long <- javelin %>%
gather(Flight,Distance,Flight1:Flight6)## Error in `javelin %>% gather(Flight, Distance, Flight1:Flight6)`:
## ! could not find function "%>%"
# Make Flight a numeric
javelin_long <-
javelin_long %>%
mutate(Flight = str_extract(Flight,'[:digit:]{1,}')) %>%
mutate(Flight = as.numeric(Flight))## Error in `javelin_long %>% mutate(Flight = str_extract(Flight, "[:digit:]{1,}")) %>%
## mutate(Flight = as.numeric(Flight))`:
## ! could not find function "%>%"
## Error in `javelin_long %>% head()`:
## ! could not find function "%>%"
javelin_totals <- javelin_long %>%
filter(Distance > 0) %>%
group_by(Athlete, EventID) %>%
summarise(
TotalDistance = sum(Distance)
,StandardDev = sd(Distance) %>% round(.,3)
,Success = n()
)## Error in `javelin_long %>% filter(Distance > 0) %>% group_by(Athlete, EventID) %>%
## summarise(TotalDistance = sum(Distance), StandardDev = sd(Distance) %>% round(
## ., 3), Success = n())`:
## ! could not find function "%>%"
## Error:
## ! object 'javelin_totals' not found
javelin <- javelin %>%
mutate(early = Flight1+Flight2+Flight3
# ,late = Flight2+Flight3+Flight4
,late = Flight4+Flight5+Flight6
) %>%
mutate(diff = late - early)## Error in `javelin %>% mutate(early = Flight1 + Flight2 + Flight3, late = Flight4 +
## Flight5 + Flight6) %>% mutate(diff = late - early)`:
## ! could not find function "%>%"
## Error in `javelin %>% tail(10)`:
## ! could not find function "%>%"
构建综合评分和选出选手
# Examine the last ten rows
# .... YOUR CODE FOR TASK 4 ....
javelin_totals <-
javelin_totals %>%
left_join(javelin ,by=c('Athlete','EventID')) %>%
select(Athlete,TotalDistance,StandardDev,Success,diff)## Error in `javelin_totals %>% left_join(javelin, by = c("Athlete", "EventID")) %>% select(
## Athlete, TotalDistance, StandardDev, Success, diff)`:
## ! could not find function "%>%"
## Error in `javelin_totals %>% head(10)`:
## ! could not find function "%>%"
norm <- function(result) {
(result - min(result)) / (max(result) - min(result))
}
aggstats <- c("TotalDistance", "StandardDev", "Success", "diff")
javelin_norm <- javelin_totals %>%
ungroup() %>%
mutate_at(vars(aggstats),norm) %>%
group_by(Athlete) %>%
summarise_all(funs(mean(.)))## Error in `javelin_totals %>% ungroup() %>% mutate_at(vars(aggstats), norm) %>% group_by(
## Athlete) %>% summarise_all(funs(mean(.)))`:
## ! could not find function "%>%"
## Error:
## ! object 'javelin_norm' not found
weights <- c(1, 2, 3, 4)
javelin_team <- javelin_norm %>%
mutate(TotalScore =
weights[1]*TotalDistance +
weights[2]*StandardDev +
weights[3]*Success +
weights[4]*diff
) %>%
arrange(desc(TotalScore)) %>%
select(Athlete,TotalScore) %>%
head(5)## Error in `javelin_norm %>% mutate(TotalScore = weights[1] * TotalDistance + weights[2] *
## StandardDev + weights[3] * Success + weights[4] * diff) %>% arrange(desc(
## TotalScore)) %>% select(Athlete, TotalScore) %>% head(5)`:
## ! could not find function "%>%"
## Error:
## ! object 'javelin_team' not found
构建可比水平
team_stats <- javelin_totals %>%
# .... YOUR CODE FOR TASK 8 ....
# .... YOUR CODE FOR TASK 8 ....
filter(Athlete %in% javelin_team$Athlete) %>%
summarise_all(funs(mean(.)))## Error in `javelin_totals %>% filter(Athlete %in% javelin_team$Athlete) %>% summarise_all(
## funs(mean(.)))`:
## ! could not find function "%>%"
pool_stats <- data.frame(do.call('cbind', sapply(javelin_totals, function(x) if(is.numeric(x)) c(max(x), mean(x)))))## Error:
## ! object 'javelin_totals' not found
## Error:
## ! object 'pool_stats' not found
## Error in `pool_stats %>% gather(key = "Statistic", value = "Aggregate", -MaxAve)`:
## ! could not find function "%>%"
## Error:
## ! object 'team_stats' not found
## Error:
## ! object 'pool_stats' not found
展示
p <- team_stats %>%
gather(Statistic,Aggregate,-Athlete) %>%
ggplot(aes(x=Athlete,y=Aggregate,fill=Athlete))+
geom_bar(stat="identity") +
facet_wrap (~Statistic, nrow = 2, ncol = 2, scales="free_y") +
geom_hline(data=pool_stats, aes(yintercept=Aggregate, group=Statistic, color=MaxAve), size=1) +
labs(title=".... Your Team Name....: Women's Javelin", color="Athlete pool maximum / average") +
scale_fill_hue(l=70) +
scale_color_hue(l=20) +
theme_minimal() +
theme(axis.text.x=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank())## Error in `team_stats %>% gather(Statistic, Aggregate, -Athlete) %>% ggplot(aes(x = Athlete,
## y = Aggregate, fill = Athlete))`:
## ! could not find function "%>%"
## Error:
## ! object 'p' not found
模拟比赛结果
## Error:
## ! object 'javelin_totals' not found
## Error:
## ! object 'team_stats' not found
## Error:
## ! object 'javelin_totals' not found
## Error:
## ! object 'HomeTeam' not found
## Error:
## ! object 'HomeTeam' not found