r - Aesthetics not right, might be related to melt function -
i have following task: -extrect 2 tabs url excel file (contain hourly data) -in 1 tab interested in data point @ 16:00, , hence filter -in second tab interested in data points, aligned, create daily average of 24 hourly values
then, use sort of cbind.fill function because want merge data. necessary, 16:00 @ 16:00 of course , couldnt use cbind before 16:00 have 1 less line have in aggregated tab.
then variable filtering , renaming.
before plotting use melt function in order create sort of subchart. here might lie problem, warning when running code. don't understand why struggle convert levels factors numerical characters.
i can still plot in end x axis looks hideous. not such difficult task getting nowhere. if give me hint, i'd appreciate it.
thanks in advance!
the code:
is.installed <- function(mypkg){ is.element(mypkg, installed.packages()[,1]) } if (!is.installed("ggplot2")){ install.packages("ggplot2") } if (!is.installed("lubridate")){ install.packages("lubridate") } if (!is.installed("openxlsx")){ install.packages("openxlsx") } library(ggplot2) library(lubridate) library(openxlsx) library(reshape2) library(dplyr) storico_g <- read.xlsx(xlsxfile = "http://www.snamretegas.it/repository/file/info-storiche-qta-gas-trasportato/dati_operativi/2017/datioperativi_2017-it.xlsx",sheet = "storico_g", startrow = 1, colnames = true) storico_g1 <- read.xlsx(xlsxfile = "http://www.snamretegas.it/repository/file/info-storiche-qta-gas-trasportato/dati_operativi/2017/datioperativi_2017-it.xlsx",sheet = "storico_g+1", startrow = 1, colnames = true) # selecting column c,e,r storico_g , stored in variable storico_g_df # selecting column a,p storico_g+1 , stored in variable storico_g1_df storico_g_df <- data.frame(storico_g$pubblicazione,storico_g$immesso, storico_g$`riconsegnato.(1)`, storico_g$bilanciamento.residuale ) storico_g1_df <- data.frame(storico_g1$pubblicazione, storico_g1$`sbilanciamento.atteso.del.sistema.(sas)`) # conerting pubblicazione in date format , time storico_g_df$pubblicazione <- ymd_h(storico_g_df$storico_g.pubblicazione) storico_g1_df$pubblicazione <- ymd_h(storico_g1_df$storico_g1.pubblicazione) # selecting on row having 4pm value in storico_g+1 excel sheet tab storico_g1_df <- subset(storico_g1_df, hour(storico_g1_df$pubblicazione) == 16) rownames(storico_g1_df) <- 1:nrow(storico_g1_df) # averaging hourly values 1 daily data point in g excel sheet tab storico_g_df$storico_g.pubblicazione <- strptime(storico_g_df$storico_g.pubblicazione, "%y_%m_%d_%h") storico_g_df_agg <- aggregate(storico_g_df, by=list(day=format(storico_g_df$storico_g.pubblicazione, "%f")), fun=mean, na.rm=true)[,-2] #cbind.fill function cbind.fill <- function(...){ nm <- list(...) nm <- lapply(nm, as.matrix) n <- max(sapply(nm, nrow)) do.call(cbind, lapply(nm, function (x) rbind(x, matrix(, n-nrow(x), ncol(x))))) } #cbind both frames g_g1_df= data.frame(cbind.fill(storico_g_df_agg,storico_g1_df)) #keep required columns keep=c("storico_g.immesso","storico_g..riconsegnato..1..","storico_g1..sbilanciamento.atteso.del.sistema..sas..") #update dataframe kept variables g_g1_df=g_g1_df[,keep,drop=false] #add counting variable g_g1_df$x=seq(nrow(g_g1_df)) #rename crazy variable names g_g1_df <- data.frame(g_g1_df) %>% select(storico_g.immesso, storico_g..riconsegnato..1.., storico_g1..sbilanciamento.atteso.del.sistema..sas.., x) names(g_g1_df) <- c("immesso","riconsegnato", "sas","x") #melt time series g_g1_df=melt(g_g1_df,id.vars = "x") #create group variable g_g1_df$group<- ifelse(g_g1_df$variable == "sas", "sas", "immesso/consegnato") #plot ggplot(g_g1_df, aes(x,value, col=variable))+geom_point()+geom_line()+facet_wrap(~group,ncol=1,scales="free_y")+geom_abline(intercept=c(-2,0,2),slope=0,data=subset(g_g1_df,group=="sas"),lwd=0.5,lty=2)
leaving aside data wrangling part (that can surely vastly improved) plot problem lies in fact value
column of character
class. (this cames data wrangling part)
as simple solution can cast numeric in ggplot
call:
library(ggplot2) ggplot(g_g1_df, aes(x,as.numeric(value), col=variable))+ geom_point()+ geom_line()+ facet_wrap(~group,ncol=1,scales="free_y")+ geom_abline(intercept=c(-2,0,2),slope=0,data=subset(g_g1_df,group=="sas"),lwd=0.5,lty=2) #> warning: removed 1 rows containing missing values (geom_point). #> warning: removed 1 rows containing missing values (geom_path).
Comments
Post a Comment