install.packages("readxl")
library(readxl)
install.packages("dplyr")
library(dplyr)
install.packages("ggplot2")
library(ggplot2)
install.packages("parkcircles")
library(parkcircles)
install.packages("packcircles")
library(packcircles)
install.packages("ggplot2")
install.packages("ggplot2")
install.packages("viridis")
library(viridis)
google <- read_excel("googleplaystore1.xlsx")
cp_google <- google
cp_google$Size <- ifelse(cp_google$Size == 'Varies with device', NA , cp_google$Size)
cp_google$Rating <- ifelse(cp_google$Rating == 'NaN', NA , cp_google$Rating)
cp_google$Reviews <- as.numeric(cp_google$Reviews)
cp_google <- na.omit(cp_google)
cp_google$Reviews <- as.numeric(cp_google$Reviews)
cp_google$Size <- gsub("M","",cp_google$Size)
cp_google$Installs <- gsub("+","",cp_google$Installs)
View(cp_google)
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'Varies with device', NA , cp_google$`Current Ver`)
cp_google <- na.omit(cp_google)
# 장르별 빈도수 데이터 생성
category <- google %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
summarise(n = n()) %>%
arrange(desc(n))
# 장르별 빈도수 데이터 생성
category <- cp_google %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
summarise(n = n()) %>%
arrange(desc(n))
# 1. 그래프 데이터 만들기
data <- data.frame(group=category$Category, value=category$n)
# 2. 레이아웃을 생성하고, sizetype은 값에 비례할 항목에 대한 선호도에 따라 면적 또는 반지름이 된다.
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)
# 3. 색상 지정하고 그래프 보여주기
ggplot() +
geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
scale_fill_manual(values = magma(nrow(data))) +
geom_text(data = data, aes(x, y, size=value, label = group)) +
scale_size_continuous(range = c(1,4)) +
theme_void() +
theme(legend.position="none") +
coord_equal()
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 평균 다운로드 수 그래프 생성
result <- ggplot(data = Category_install, aes( x = reorder(Category_install$Category,-Category_install$mean_install) , y=Category_install$mean_install)) + geom_col() + coord_flip()
result + labs(x="장르",y="다운로드",title = "장르별 다운로드 수")
# 장르별 평균 리뷰 수 데이터 생성
Category_review <- cp_google %>%
filter(!is.na(Category) & !is.na(Type) & !is.na(Reviews)) %>%
group_by(Category , Type) %>%
summarise(mean_review = mean(Reviews)) %>%
arrange(desc(mean_review))
# 평균 리뷰 수 그래프 생성
result1 <- ggplot(data = Category_review, aes( x = reorder(Category_review$Category,-Category_review$mean_review) , y=Category_review$mean_review)) + geom_col() + coord_flip()
result1 + labs(x="장르",y="리뷰 수",title = "장르별 리뷰뷰 수")
View(cp_google)
cp_google$Installs <- ifelse( cp_google$Installs == '1+', 1,
ifelse(cp_google$Installs == '10+', 10 ,
ifelse(cp_google$Installs == '100+',100,
ifelse(cp_google$Installs == '1,000+',1000,
ifelse(cp_google$Installs == '10,000+',10000,
ifelse(cp_google$Installs=='100,000+',100000,
ifelse(cp_google$Installs=='5+',5,
ifelse(cp_google$Installs=='50+',50,
ifelse(cp_google$Installs=='500+',500,
ifelse(cp_google$Installs =='5,000+',5000,
ifelse(cp_google$Installs=='50,000+',50000,
ifelse(cp_google$Installs=='1,000,000+',1000000,
ifelse(cp_google$Installs=='500,000+',500000,
ifelse(cp_google$Installs=='5,000,000+',5000000,
ifelse(cp_google$Installs=='10,000,000+',10000000,
ifelse(cp_google$Installs=='50,000,000+',50000000, ifelse(google$Installs=='100,000,000+',100000000, ifelse(google$Installs=='500,000,000+',500000000, ifelse(google$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'NaN', NA , cp_google$`Current Ver`)
cp_google <- na.omit(cp_google)
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 평균 다운로드 수 그래프 생성
result <- ggplot(data = Category_install, aes( x = reorder(Category_install$Category,-Category_install$mean_install) , y=Category_install$mean_install)) + geom_col() + coord_flip()
result + labs(x="장르",y="다운로드",title = "장르별 다운로드 수")
result1 + labs(x="장르",y="리뷰 수",title = "장르별 리뷰뷰 수")
# 데이터는 Category_install 사용
mycolor <- ifelse(Category_install$Type == 'Paid', "type1", "type2")
# 그래프 생성
ggplot(data = Category_install, aes(x=Category_install$Type, y=Category_install$mean_install)) +
geom_segment( aes(x=Category_install$Category, xend=Category_install$Category, y=1, yend=Category_install$mean_install, color=mycolor), size=1, alpha=0.9) +
theme_light() +
theme(
axis.text.x = element_text(angle = 90),
legend.position = "none",
panel.border = element_blank(),
) +
xlab("") +
ylab("Value of Y")
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
filter(!is.na(Size) & !is.na(Category) & !is.na(Installs)) %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
# 그래프
result3 <- ggplot(data=Category_install_size, aes( x = reorder(Category_install_size$Category,Category_install_size$mean_install) , y= Category_install_size$mean_size)) + geom_col( ) + coord_flip()
result3 + labs(x="Genre",y="Size And Install",title = "Size by genre")
cp_google$Size <- gsub("K","",cp_google$Size)
cp_google$Size <- gsub("k","",cp_google$Size)
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
filter(!is.na(Size) & !is.na(Category) & !is.na(Installs)) %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
View(cp_google)
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
filter(!is.na(Size) & !is.na(Category) & !is.na(Installs)) %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
# 그래프
result3 <- ggplot(data=Category_install_size, aes( x = reorder(Category_install_size$Category,Category_install_size$mean_install) , y= Category_install_size$mean_size)) + geom_col( ) + coord_flip()
result3 + labs(x="Genre",y="Size And Install",title = "Size by genre")
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
filter(!is.na(Category) & !is.na(Installs)) %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
is.numeric(cp_google$Size)
cp_google$Size <-as.numeric(cp_google$Size)
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
filter(!is.na(Size) &  !is.na(Category) & !is.na(Installs)) %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
# 그래프
result3 <- ggplot(data=Category_install_size, aes( x = reorder(Category_install_size$Category,Category_install_size$mean_install) , y= Category_install_size$mean_size)) + geom_col( ) + coord_flip()
result3 + labs(x="Genre",y="Size And Install",title = "Size by genre")
cp_google <- google
cp_google$Size <- ifelse(cp_google$Size == 'Varies with device', NA , cp_google$Size)
cp_google$Rating <- ifelse(cp_google$Rating == 'NaN', NA , cp_google$Rating)
cp_google <- na.omit(cp_google)
cp_google$Reviews <- as.numeric(cp_google$Reviews)
cp_google$Size <- gsub("M","",cp_google$Size)
cp_google$Installs <- ifelse( cp_google$Installs == '1+', 1,
ifelse(cp_google$Installs == '10+', 10 ,
ifelse(cp_google$Installs == '100+',100,
ifelse(cp_google$Installs == '1,000+',1000,
ifelse(cp_google$Installs == '10,000+',10000,
ifelse(cp_google$Installs=='100,000+',100000,
ifelse(cp_google$Installs=='5+',5,
ifelse(cp_google$Installs=='50+',50,
ifelse(cp_google$Installs=='500+',500,
ifelse(cp_google$Installs =='5,000+',5000,
ifelse(cp_google$Installs=='50,000+',50000,
ifelse(cp_google$Installs=='1,000,000+',1000000,
ifelse(cp_google$Installs=='500,000+',500000,
ifelse(cp_google$Installs=='5,000,000+',5000000,
ifelse(cp_google$Installs=='10,000,000+',10000000,
ifelse(cp_google$Installs=='50,000,000+',50000000, ifelse(google$Installs=='100,000,000+',100000000, ifelse(google$Installs=='500,000,000+',500000000, ifelse(google$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'Varies with device', NA , cp_google$`Current Ver`)
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'NaN', NA , cp_google$`Current Ver`)
cp_google <- na.omit(cp_google)
# 장르별 빈도수 데이터 생성
category <- cp_google %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
summarise(n = n()) %>%
arrange(desc(n))
# 1. 그래프 데이터 만들기
data <- data.frame(group=category$Category, value=category$n)
# 2. 레이아웃을 생성하고, sizetype은 값에 비례할 항목에 대한 선호도에 따라 면적 또는 반지름이 된다.
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)
# 3. 색상 지정하고 그래프 보여주기
ggplot() +
geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
scale_fill_manual(values = magma(nrow(data))) +
geom_text(data = data, aes(x, y, size=value, label = group)) +
scale_size_continuous(range = c(1,4)) +
theme_void() +
theme(legend.position="none") +
coord_equal()
View(cp_google)
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 평균 다운로드 수 그래프 생성
result <- ggplot(data = Category_install, aes( x = reorder(Category_install$Category,-Category_install$mean_install) , y=Category_install$mean_install)) + geom_col() + coord_flip()
result + labs(x="장르",y="다운로드",title = "장르별 다운로드 수")
# 장르별 평균 리뷰 수 데이터 생성
Category_review <- cp_google %>%
filter(!is.na(Category) & !is.na(Type) & !is.na(Reviews)) %>%
group_by(Category , Type) %>%
summarise(mean_review = mean(Reviews)) %>%
arrange(desc(mean_review))
# 평균 리뷰 수 그래프 생성
result1 <- ggplot(data = Category_review, aes( x = reorder(Category_review$Category,-Category_review$mean_review) , y=Category_review$mean_review)) + geom_col() + coord_flip()
result1 + labs(x="장르",y="리뷰 수",title = "장르별 리뷰뷰 수")
# 데이터는 Category_install 사용
mycolor <- ifelse(Category_install$Type == 'Paid', "type1", "type2")
# 그래프 생성
ggplot(data = Category_install, aes(x=Category_install$Type, y=Category_install$mean_install)) +
geom_segment( aes(x=Category_install$Category, xend=Category_install$Category, y=1, yend=Category_install$mean_install, color=mycolor), size=1, alpha=0.9) +
theme_light() +
theme(
axis.text.x = element_text(angle = 90),
legend.position = "none",
panel.border = element_blank(),
) +
xlab("") +
ylab("Value of Y")
View(cp_google)
is.numeric(cp_google$Size)
cp_google$Size <-as.numeric(cp_google$Size)
google <- read_excel("googleplaystore1.xlsx")
cp_google <- google
cp_google$Size <- ifelse(cp_google$Size == 'Varies with device', NA , cp_google$Size)
cp_google$Rating <- ifelse(cp_google$Rating == 'NaN', NA , cp_google$Rating)
cp_google <- na.omit(cp_google)
cp_google$Reviews <- as.numeric(cp_google$Reviews)
cp_google$Size <- gsub("M","",cp_google$Size)
cp_google$Installs <- ifelse( cp_google$Installs == '1+', 1,
ifelse(cp_google$Installs == '10+', 10 ,
ifelse(cp_google$Installs == '100+',100,
ifelse(cp_google$Installs == '1,000+',1000,
ifelse(cp_google$Installs == '10,000+',10000,
ifelse(cp_google$Installs=='100,000+',100000,
ifelse(cp_google$Installs=='5+',5,
ifelse(cp_google$Installs=='50+',50,
ifelse(cp_google$Installs=='500+',500,
ifelse(cp_google$Installs =='5,000+',5000,
ifelse(cp_google$Installs=='50,000+',50000,
ifelse(cp_google$Installs=='1,000,000+',1000000,
ifelse(cp_google$Installs=='500,000+',500000,
ifelse(cp_google$Installs=='5,000,000+',5000000,
ifelse(cp_google$Installs=='10,000,000+',10000000,
ifelse(cp_google$Installs=='50,000,000+',50000000, ifelse(google$Installs=='100,000,000+',100000000, ifelse(google$Installs=='500,000,000+',500000000, ifelse(google$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'Varies with device', NA , cp_google$`Current Ver`)
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'NaN', NA , cp_google$`Current Ver`)
cp_google <- na.omit(cp_google)
# 장르별 빈도수 데이터 생성
category <- cp_google %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
summarise(n = n()) %>%
arrange(desc(n))
# 1. 그래프 데이터 만들기
data <- data.frame(group=category$Category, value=category$n)
# 2. 레이아웃을 생성하고, sizetype은 값에 비례할 항목에 대한 선호도에 따라 면적 또는 반지름이 된다.
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)
# 3. 색상 지정하고 그래프 보여주기
ggplot() +
geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
scale_fill_manual(values = magma(nrow(data))) +
geom_text(data = data, aes(x, y, size=value, label = group)) +
scale_size_continuous(range = c(1,4)) +
theme_void() +
theme(legend.position="none") +
coord_equal()
View(cp_google)
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>%
filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>%
group_by(Category, Type) %>%
summarise(mean_install = mean(Installs)) %>%
arrange(desc(mean_install))
# 평균 다운로드 수 그래프 생성
result <- ggplot(data = Category_install, aes( x = reorder(Category_install$Category,-Category_install$mean_install) , y=Category_install$mean_install)) + geom_col() + coord_flip()
result + labs(x="장르",y="다운로드",title = "장르별 다운로드 수")
# 장르별 평균 리뷰 수 데이터 생성
Category_review <- cp_google %>%
filter(!is.na(Category) & !is.na(Type) & !is.na(Reviews)) %>%
group_by(Category , Type) %>%
summarise(mean_review = mean(Reviews)) %>%
arrange(desc(mean_review))
# 평균 리뷰 수 그래프 생성
result1 <- ggplot(data = Category_review, aes( x = reorder(Category_review$Category,-Category_review$mean_review) , y=Category_review$mean_review)) + geom_col() + coord_flip()
result1 + labs(x="장르",y="리뷰 수",title = "장르별 리뷰뷰 수")
# 데이터는 Category_install 사용
mycolor <- ifelse(Category_install$Type == 'Paid', "type1", "type2")
# 그래프 생성
ggplot(data = Category_install, aes(x=Category_install$Type, y=Category_install$mean_install)) +
geom_segment( aes(x=Category_install$Category, xend=Category_install$Category, y=1, yend=Category_install$mean_install, color=mycolor), size=1, alpha=0.9) +
theme_light() +
theme(
axis.text.x = element_text(angle = 90),
legend.position = "none",
panel.border = element_blank(),
) +
xlab("") +
ylab("Value of Y")
View(cp_google)
cp_google$Size <-as.numeric(cp_google$Size)
is.numeric(cp_google$Size)
# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>%
filter(!is.na(Size) &  !is.na(Category) & !is.na(Installs)) %>%
group_by(Category) %>%
summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
arrange(desc(mean_size))
# 그래프
result3 <- ggplot(data=Category_install_size, aes( x = reorder(Category_install_size$Category,Category_install_size$mean_install) , y= Category_install_size$mean_size)) + geom_col( ) + coord_flip()
result3 + labs(x="Genre",y="Size And Install",title = "Size by genre")
view(google)
view(google)
View(google)
View(category)
View(Category_install)
View(Category_review)
View(Category_install_size)
