• r

Extracting and visualizing data from Google Analytics

More on the process of grabbing data from the core reporting API in google analytics, then some visualization using ggplot2.

Here, I want to compare yearly data, both overall, and broken out by source in another graph. For some reason, my library(rga) broke, so I will resort to the library(RGA) method.

library(RGA)
authorize(client.id = "xxxxxxx", client.secret = "xxxxxxx")

startdate <- "2015-10-01"
enddate <- "2015-11-27"
id <- "ga:xxxxxxx"


df2015 <- get_ga(
	id,
	start.date = startdate,
	end.date = enddate,
	metrics="ga:sessions",
	dimensions="ga:medium,ga:year,ga:nthDay",
	segment= ,
	sort = "ga:nthDay",
	filter = ,
	start = 1,
	max = 10000,
)

startdate <- "2014-10-01"
enddate <- "2014-12-31"
id <- "ga:66051804"

df2014 <- get_ga(
	id,
	start.date = startdate,
	end.date = enddate,
	metrics="ga:sessions",
	dimensions="ga:medium,ga:year,ga:nthDay",
	segment= ,
	sort = "ga:nthDay",
	filter = ,
	start = 1,
	max = 10000,
)

As you can see, if I plot a year by year, I will have missing dates, so I will need to fill in empty nth day values for df2015

alldays<-data.frame(seq(min(0),max(91)))
#merge together by nth.day, keeping all values of alldays
df2015<-merge(df2015, alldays, by.x='nth.day', by.y='seq.min.0...max.91..', all=TRUE)
df2015$medium[is.na(df2015$medium)] <- 'extend'
df2015$year[is.na(df2015$year)] <- 2015
df015$sessions[is.na(df2015$sessions)] <- 0

Join 2014 and 2015 data together by rows, meaning column names must match up.

bind<-rbind(df2014,df2015)
bind$year<-as.character(bind$year)
bind$nthday<-bind$nth.day

For overall sessions:

data<-sqldf("select year,nthday,medium,sum(sessions) as sessions
	from bind
	group by year,nthday,medium")
library(ggplot2)
library(ggthemes)
# plot sessions faceted by year, with vertical reference lines for Thanksgiving and Christmas
f<-ggplot(data=data,aes(x=nthday,y=sessions,group=year, shape=year, color=year))
	+ geom_line()
	+ ggtitle("Oct, Nov, Dec - 2014 vs 2015")
	+ scale_colour_tableau()
	+ geom_vline(xintercept=c(57, 85), colour="black", linetype = "longdash")
	+ geom_text(aes(x=57, label="Thanksgiving 2014", y=16000), colour="black", angle=90, vjust = 1.2, text=element_text(size=5))
	+ geom_text(aes(x=85, label="Christmas 2014", y=16000), colour="black", angle=90, vjust = 1.2, text=element_text(size=5))

For session breakout by medium:

data<-sqldf("select year,nthday,medium,sum(sessions) as sessions
	from bind
	where medium in ('(none)','DIS','SCL','VID','cpc','organic','referral')
	group by year,nthday,medium")

# Rename medium names
data$medium[grepl("(none)",data$medium,ignore.case=FALSE)]<-"direct"
data$medium[grepl("DIS",data$medium,ignore.case=FALSE)]<-"display"
data$medium[grepl("SCL",data$medium,ignore.case=FALSE)]<-"social"
data$medium[grepl("VID",data$medium,ignore.case=FALSE)]<-"preroll video"
data$medium[grepl("cpc",data$medium,ignore.case=FALSE)]<-"paid search"
data$medium[grepl("organic",data$medium,ignore.case=FALSE)]<-"organic search"

# Plot by medium, facet by year
f<-ggplot(data=data,aes(x=nthday,y=sessions, group=medium, shape=medium, color=medium, fill=medium))
	+ geom_line()
	+ facet_grid(year~.,scales="fixed")
	+ ggtitle("Oct, Nov, Dec - 2014 vs 2015")
	+ scale_colour_pander()
	+ geom_vline(xintercept=c(57, 85), colour="black", linetype = "longdash")