畫圖~
ggplot2
需要的套件
library(ggplot2)
library(data.table)
library(dplyr)
library(reshape2)
Ben Chen
library(ggplot2)
library(data.table)
library(dplyr)
library(reshape2)
# 讀檔起手式
ubike = read.csv('ubikeweatherutf8.csv') #請輸入正確的檔案路徑
# 讀檔進階招式
ubike = read.csv('檔案路徑',
colClasses = c("factor","integer","integer","factor","factor",
"numeric","numeric","integer","numeric","integer",
"integer","numeric","numeric", "integer","integer",
"numeric","numeric","numeric", "numeric","numeric",
"numeric"))
# 讀檔大絕招
ubike = fread('檔案路徑',
data.table = FALSE,
colClasses = c("factor","integer","integer","factor",
"factor","numeric", "numeric", "integer",
"numeric", "integer","integer","numeric",
"numeric", "integer","integer","numeric",
"numeric","numeric", "numeric","numeric",
"numeric"))
colnames(ubike) <-
c("日期", "時間", "場站代號", "場站區域", "場站名稱",
"緯度", "經度", "總停車格", "平均車輛數", "最大車輛數",
"最小車輛數", "車輛數標準差", "平均空位數", "最大空位數",
"最小空位數", "空位數標準差", "平均氣溫", "溼度",
"氣壓", "最大風速", "降雨量")
thm <- theme(text=element_text(size=20,family="STHeiti")) # 控制字體與大小
# STHeiti是只有Mac才有的字體
ggplot(ubike) +
geom_histogram(aes(x = 最大風速, y=..count..))+thm
ggplot(ubike) +
geom_histogram(aes(x = 最大風速, y=..density..))+thm
ggplot(ubike) +
geom_histogram(aes(x = 最大風速, y=..density..,fill=..count..))+thm
ggplot(ubike,aes(x = 最大風速)) +
geom_histogram(aes(y=..density..,fill=..count..))+
geom_density()+thm
x3 <- filter(ubike, grepl("2015-02", 日期, fixed = TRUE), 場站區域 == "文山區") %>%
group_by(場站名稱) %>%
summarise(平均降雨量 = mean(降雨量), 平均溼度 = mean(溼度))
ggplot(x3) +
geom_point(aes(x = 平均溼度, y = 平均降雨量),size=5) + #size控制點的大小
thm
ggplot(x3) +
# 放在aes裡的colour和size可依資料調整顏色和大小
geom_point(aes(x = 平均溼度, y = 平均降雨量, colour = 場站名稱,size=平均降雨量))+
# 限制大小
scale_size(range=c(5,10)) +
thm
## N.Amer Europe Asia S.Amer Oceania
## 1951 45939 21574 2876 1815 1646
## 1956 60423 29990 4708 2568 2366
## 1957 64721 32510 5230 2695 2526
## 1958 68484 35218 6662 2845 2691
## 1959 71799 37598 6856 3000 2868
## 1960 76036 40341 8220 3145 3054
## 1961 79831 43173 9053 3338 3224
## Africa Mid.Amer
## 1951 89 555
## 1956 1411 733
## 1957 1546 773
## 1958 1663 836
## 1959 1769 911
## 1960 1905 1008
## 1961 2005 1076
ggplot(WorldPhones,aes(x=?????,y=Asia))......
class(WorldPhones)
## [1] "matrix"
WP.df=as.data.frame(WorldPhones)
WP.df$year <- rownames(WP.df)
class(WP.df)
## [1] "data.frame"
ggplot(WP.df,aes(x=year,y=Asia))+geom_line()
## geom_path: Each group consist of only one observation. Do you need to adjust the group aesthetic?
str(WP.df)
## 'data.frame': 7 obs. of 8 variables:
## $ N.Amer : num 45939 60423 64721 68484 71799 ...
## $ Europe : num 21574 29990 32510 35218 37598 ...
## $ Asia : num 2876 4708 5230 6662 6856 ...
## $ S.Amer : num 1815 2568 2695 2845 3000 ...
## $ Oceania : num 1646 2366 2526 2691 2868 ...
## $ Africa : num 89 1411 1546 1663 1769 ...
## $ Mid.Amer: num 555 733 773 836 911 ...
## $ year : chr "1951" "1956" "1957" "1958" ...
WP.df$year=as.numeric(WP.df$year)
ggplot(WP.df,aes(x=year,y=Asia))+
geom_line()+thm
ggplot(WP.df,aes(x=year,y=Asia))+
geom_line(size=2)+ #size控制線的寬度或點的大小
geom_point(size=5)+thm
N.Amer | Europe | Asia | S.Amer | Oceania | Africa | Mid.Amer | year | |
---|---|---|---|---|---|---|---|---|
1951 | 45939.00 | 21574.00 | 2876.00 | 1815.00 | 1646.00 | 89.00 | 555.00 | 1951.00 |
1956 | 60423.00 | 29990.00 | 4708.00 | 2568.00 | 2366.00 | 1411.00 | 733.00 | 1956.00 |
1957 | 64721.00 | 32510.00 | 5230.00 | 2695.00 | 2526.00 | 1546.00 | 773.00 | 1957.00 |
1958 | 68484.00 | 35218.00 | 6662.00 | 2845.00 | 2691.00 | 1663.00 | 836.00 | 1958.00 |
1959 | 71799.00 | 37598.00 | 6856.00 | 3000.00 | 2868.00 | 1769.00 | 911.00 | 1959.00 |
1960 | 76036.00 | 40341.00 | 8220.00 | 3145.00 | 3054.00 | 1905.00 | 1008.00 | 1960.00 |
1961 | 79831.00 | 43173.00 | 9053.00 | 3338.00 | 3224.00 | 2005.00 | 1076.00 | 1961.00 |
\[\Downarrow\]
library(reshape2)
WP.long=melt(WP.df,id='year') #id是將保留的欄位名稱
colnames(WP.long)=c('year','area','number')
year | area | number | |
---|---|---|---|
1 | 1951.00 | N.Amer | 45939.00 |
2 | 1956.00 | N.Amer | 60423.00 |
3 | 1957.00 | N.Amer | 64721.00 |
4 | 1958.00 | N.Amer | 68484.00 |
5 | 1959.00 | N.Amer | 71799.00 |
6 | 1960.00 | N.Amer | 76036.00 |
7 | 1961.00 | N.Amer | 79831.00 |
8 | 1951.00 | Europe | 21574.00 |
9 | 1956.00 | Europe | 29990.00 |
10 | 1957.00 | Europe | 32510.00 |
11 | 1958.00 | Europe | 35218.00 |
12 | 1959.00 | Europe | 37598.00 |
13 | 1960.00 | Europe | 40341.00 |
14 | 1961.00 | Europe | 43173.00 |
15 | 1951.00 | Asia | 2876.00 |
16 | 1956.00 | Asia | 4708.00 |
17 | 1957.00 | Asia | 5230.00 |
18 | 1958.00 | Asia | 6662.00 |
19 | 1959.00 | Asia | 6856.00 |
20 | 1960.00 | Asia | 8220.00 |
21 | 1961.00 | Asia | 9053.00 |
22 | 1951.00 | S.Amer | 1815.00 |
23 | 1956.00 | S.Amer | 2568.00 |
24 | 1957.00 | S.Amer | 2695.00 |
25 | 1958.00 | S.Amer | 2845.00 |
26 | 1959.00 | S.Amer | 3000.00 |
27 | 1960.00 | S.Amer | 3145.00 |
28 | 1961.00 | S.Amer | 3338.00 |
29 | 1951.00 | Oceania | 1646.00 |
30 | 1956.00 | Oceania | 2366.00 |
31 | 1957.00 | Oceania | 2526.00 |
32 | 1958.00 | Oceania | 2691.00 |
33 | 1959.00 | Oceania | 2868.00 |
34 | 1960.00 | Oceania | 3054.00 |
35 | 1961.00 | Oceania | 3224.00 |
36 | 1951.00 | Africa | 89.00 |
37 | 1956.00 | Africa | 1411.00 |
38 | 1957.00 | Africa | 1546.00 |
39 | 1958.00 | Africa | 1663.00 |
40 | 1959.00 | Africa | 1769.00 |
41 | 1960.00 | Africa | 1905.00 |
42 | 1961.00 | Africa | 2005.00 |
43 | 1951.00 | Mid.Amer | 555.00 |
44 | 1956.00 | Mid.Amer | 733.00 |
45 | 1957.00 | Mid.Amer | 773.00 |
46 | 1958.00 | Mid.Amer | 836.00 |
47 | 1959.00 | Mid.Amer | 911.00 |
48 | 1960.00 | Mid.Amer | 1008.00 |
49 | 1961.00 | Mid.Amer | 1076.00 |
ggplot(WP.long,aes(x=year,y=number,group=area,color=area))+ # gruop按照不同區域劃線
geom_line(size=1.5)+
geom_point(size=5)+thm
pixnet=read.csv('train.csv',stringsAsFactors = FALSE)
ggplot(pixnet,aes(x=referrer_venue))+
geom_bar(stat='bin')+thm # stat='bin'算個數
ub2=filter(ubike, 場站區域=='中和區',時間==8) %>%
mutate(is.rain=降雨量>1) %>%
mutate(is.rain=factor(is.rain, levels=c(FALSE, TRUE),
labels = c("晴天","雨天"))) %>%
select(日期, 平均空位數, 場站名稱, is.rain,總停車格) %>%
group_by(場站名稱, is.rain) %>%
summarise(use_rate=mean(平均空位數/總停車格))
head(ub2)
## Source: local data frame [6 x 3]
## Groups: 場站名稱 [3]
##
## 場站名稱 is.rain use_rate
## (fctr) (fctr) (dbl)
## 1 捷運永安市場站 晴天 0.6671052
## 2 捷運永安市場站 雨天 0.6483044
## 3 秀山國小 晴天 0.4966519
## 4 秀山國小 雨天 0.4436588
## 5 中和公園 晴天 0.6363115
## 6 中和公園 雨天 0.5917228
las2 <- theme(axis.text.x = element_text(angle = 90, hjust = 1),
text=element_text(size=20,family="STHeiti")) #控制字的方向
ggplot(ub2,aes(x=場站名稱,y=use_rate,fill=is.rain))+
geom_bar(stat='identity')+
las2 # stat='identity'以表格的值做為bar的高度
ggplot(ub2,aes(x=場站名稱,y=use_rate,fill=is.rain))+
geom_bar(stat='identity',position = 'dodge')+las2 #dodge類別並排
pix=data.frame(table(pixnet$referrer_venue)) #table可以算個類別個數
colnames(pix)=c('入口網站','數量')
pix[5,2]=pix[5,2]+pix[1,2]
pix=pix[-1,]
ggplot(pix,aes(x="",y=數量,fill=入口網站))+
geom_bar(stat='identity',width=1)+
coord_polar('y')+
geom_text(aes(y = 數量*0.5+ c(0, cumsum(數量)[-length(數量)]),
label = paste(round(數量/sum(數量),3)*100,'%',sep="")),
size=7)+
theme(axis.title.y = element_blank(),
axis.text.x=element_blank(),
panel.grid=element_blank(),
text=element_text(size=20,family="STHeiti"))
ggplot(data=WP.df)+geom_line(aes(x=year,y=Asia))
ggplot(WP.df,aes(x=year,y=Asia))+
geom_line(size=2)+geom_point(size=5)
ggplot(x3) +
geom_point(aes(x =平均溼度, y=平均降雨量,colour=場站名稱,size=平均降雨量))+
scale_size(range=c(5,10)) +thm
ggplot(pressure,aes(x=temperature,y=pressure))+
geom_point()+
stat_smooth()
ggplot(pix,aes(x="",y=數量,fill=入口網站))+
geom_bar(stat='identity')+thm
ggplot(pix,aes(x="",y=數量,fill=入口網站))+
geom_bar(stat='identity',width=1)+
coord_polar('y')+thm
rain <- filter(ubike, grepl("2015-02", 日期, fixed = TRUE), 場站區域 == "中和區") %>%
group_by(日期,場站名稱) %>%
summarise(每日平均降雨量 = mean(降雨量))
ggplot(rain) + thm+las2+
geom_line(aes(x = 日期, y = 每日平均降雨量,group=場站名稱,colour=場站名稱),size=2)
ggplot(rain) +thm+las2+facet_wrap(~場站名稱,nrow=2)+ # facet_wrap將各站的情況分開畫
geom_line(aes(x = 日期, y = 每日平均降雨量,group=場站名稱,colour=場站名稱),size=2)
# 畫完圖之後,再存檔~~
ggsave('檔案名稱')
sos
套件,請見Demo