install.packages("readxl")
library(readxl)
install.packages("dplyr")
library(dplyr)
install.packages("ggplot2")
library(ggplot2)
install.packages("packcircles")
library(packcircles)
install.packages("viridis")
library(viridis)

google <- read_excel("googleplaystore1.xlsx")
cp_google <- google
View(google)

cp_google$Size <- ifelse(cp_google$Size == 'Varies with device', NA , cp_google$Size)
cp_google$Rating <- ifelse(cp_google$Rating == 'NaN', NA , cp_google$Rating)
cp_google <- na.omit(cp_google)
cp_google$Reviews <- as.numeric(cp_google$Reviews)

cp_google$Size <- gsub("M","",cp_google$Size)



cp_google$Installs <- ifelse( cp_google$Installs == '1+', 1, 
                           ifelse(cp_google$Installs == '10+', 10 ,
                                  ifelse(cp_google$Installs == '100+',100,
                                         ifelse(cp_google$Installs == '1,000+',1000,
                                                ifelse(cp_google$Installs == '10,000+',10000,
                                                       ifelse(cp_google$Installs=='100,000+',100000,
                                                              ifelse(cp_google$Installs=='5+',5,
                                                                     ifelse(cp_google$Installs=='50+',50,
                                                                            ifelse(cp_google$Installs=='500+',500,
                                                                                   ifelse(cp_google$Installs =='5,000+',5000,
                                                                                          ifelse(cp_google$Installs=='50,000+',50000,
                                                                                                 ifelse(cp_google$Installs=='1,000,000+',1000000, 
                                                                                                        ifelse(cp_google$Installs=='500,000+',500000, 
                                                                                                               ifelse(cp_google$Installs=='5,000,000+',5000000, 
                                                                                                                      ifelse(cp_google$Installs=='10,000,000+',10000000, 
                                                                                                                             ifelse(cp_google$Installs=='50,000,000+',50000000, ifelse(google$Installs=='100,000,000+',100000000, ifelse(google$Installs=='500,000,000+',500000000, ifelse(google$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))


cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'Varies with device', NA , cp_google$`Current Ver`)
cp_google$`Current Ver` <- ifelse(cp_google$`Current Ver` == 'NaN', NA , cp_google$`Current Ver`)

cp_google <- na.omit(cp_google)

# 장르별 빈도수 데이터 생성
category <- cp_google %>% 
  filter(!is.na(Category)) %>% 
  group_by(Category) %>% 
  summarise(n = n()) %>% 
  arrange(desc(n))

# 1. 그래프 데이터 만들기
data <- data.frame(group=category$Category, value=category$n) 

# 2. 레이아웃을 생성하고, sizetype은 값에 비례할 항목에 대한 선호도에 따라 면적 또는 반지름이 된다. 
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)


# 3. 색상 지정하고 그래프 보여주기
ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
  scale_fill_manual(values = magma(nrow(data))) +
  geom_text(data = data, aes(x, y, size=value, label = group)) +
  scale_size_continuous(range = c(1,4)) +
  theme_void() + 
  theme(legend.position="none") +
  coord_equal()

View(Category_review)
# 장르별 평균 다운로드 수 데이터 생성
Category_install <- cp_google %>% 
  filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>% 
  group_by(Category, Type) %>% 
  summarise(mean_install = mean(Installs)) %>% 
  arrange(desc(mean_install))

# 평균 다운로드 수 그래프 생성
result <- ggplot(data = Category_install, aes( x = reorder(Category_install$Category,-Category_install$mean_install) , y=Category_install$mean_install)) + geom_col() + coord_flip()
result + labs(x="장르",y="다운로드",title = "장르별 다운로드 수")

# 장르별 평균 리뷰 수 데이터 생성
Category_review <- cp_google %>% 
  filter(!is.na(Category) & !is.na(Type) & !is.na(Reviews)) %>% 
  group_by(Category , Type) %>% 
  summarise(mean_review = mean(Reviews)) %>% 
  arrange(desc(mean_review))

# 평균 리뷰 수 그래프 생성
result1 <- ggplot(data = Category_review, aes( x = reorder(Category_review$Category,-Category_review$mean_review) , y=Category_review$mean_review)) + geom_col() + coord_flip()
result1 + labs(x="장르",y="리뷰 수",title = "장르별 리뷰뷰 수")

# 색 지정
mycolor <- ifelse(Category_install$Type == 'Paid', "type1", "type2")

# 그래프 생성
ggplot(data = Category_install, aes(x=Category_install$Type, y=Category_install$mean_install)) +
  geom_segment( aes(x=Category_install$Category, xend=Category_install$Category, y=1, 
                    yend=Category_install$mean_install, color=mycolor), size=1, alpha=0.9) +
  theme_light() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none",
    panel.border = element_blank(),
  ) +
  xlab("") +
  ylab("Value of Y")

View(Category_install_size)
is.numeric(cp_google$Size)
cp_google$Size <-as.numeric(cp_google$Size)

# 장르별 평균 다운로드 수와 평균 사이즈 데이터 설정
Category_install_size <- cp_google %>% 
  filter(!is.na(Size) &  !is.na(Category) & !is.na(Installs)) %>% 
  group_by(Category) %>% 
  summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
  arrange(desc(mean_size))

# 그래프
result3 <- ggplot(data=Category_install_size, 
                  aes( x = reorder(Category_install_size$Category,
                                   Category_install_size$mean_install), 
                       y= Category_install_size$mean_size)) + 
  geom_col( ) + coord_flip()
result3 + labs(x="Genre",y="Size And Install",title = "Size by genre")

