3000 Hit Analyzer

Download server.R script here.
Download ui.R script here.
Download batting.csv data file here and master.csv data file here.

server.R:

library(shiny)
library(XML)
library(chron)
library(plyr)
 
### OVERHEAD
### Scrape 3000 hit club from www.baseball-reference.com
b = readLines("http://www.baseball-reference.com/leaders/H_career.shtml")
bdoc <- htmlParse(b, asText = T)
pathResult <- getNodeSet(bdoc, path = "//td/a")[1:28]
members <- unlist(lapply(pathResult, function(x) c(xmlValue(x))))
members <- gsub("[+]","", members)
 
### Get members first and last name to match with Master.csv playerID
memberFirst <- lapply(strsplit(members, split = "[[:space:]]"), function(x) x[1])
memberLast <- lapply(strsplit(members, split = "[[:space:]]"), function(x) x[2])
 
### Read in local files downloaded from...
setwd("C:/chitchat/data")
master <- read.csv("Master.csv", header = TRUE, sep = ",", quote = "\"",
                  dec = ".", fill = TRUE, comment.char = "")
batting <- read.csv("Batting.csv", header = TRUE, sep = ",", quote = "\"",
                   dec = ".", fill = TRUE, comment.char = "")
 
### extract playerIDs from Master.csv and 
### extract hits and other batting data from Batting.csv
memberId <- vector()
battingMember <- list()
hitsMember <- list()
for(i in 1:length(memberLast)){
  masterSub <- subset(master, as.character(master$nameLast) == memberLast[[i]] &
                        as.character(master$nameFirst) == memberFirst[[i]]) 
   
  if(nrow(masterSub) > 1){ masterSub = masterSub[1, ] }
   
  memberId[i] <- as.character(masterSub$playerID)
  battingMember[[i]] <- batting[as.character(batting$playerID) == memberId[i], ]
  hitsMember[[i]] <- battingMember[[i]]$H
}
 
### Calculate cumulative summation of hits for all members
mHitsCumSum <- lapply(hitsMember, function(x) cumsum(x))
 
### For plotting
maxYears <- max(unlist(lapply(hitsMember, function(x) length(x))))
maxHits <- max(unlist(lapply(mHitsCumSum, function(x) max(x))))
 
### Define server logic required to plot various players against 3000 hit club
shinyServer(function(input, output) {
   
  ### get hits for chosen 3000 club member
  currentMemberHits <- reactive({ 
     
    ### Calculate cumulative summation of hits
    cumsum(hitsMember[[match(as.character(input$member), memberId)]])
     
  })
   
  ### get hits for non-3000 club player
  currentPlayerHits <- reactive({ 
     
    playerFirst <- lapply(strsplit(input$player, split = "[[:space:]]"),
                          function(x) x[1])
    playerLast <- lapply(strsplit(input$player, split = "[[:space:]]"),
                         function(x) x[2])
     
    ### extract hits and other batting data from Batting table
    masterPlayer <- list()
    playerId <- vector()
    battingPlayer <- list()
    hitsPlayer <- list()
    for(i in 1:length(playerLast)){
      masterSub <- subset(master, 
                          as.character(master$nameLast) == playerLast &
                          as.character(master$nameFirst) == playerFirst) 
       
      if(nrow(masterSub) > 1){ masterSub = masterSub[1, ] }
       
      playerId <- as.character(masterSub$playerID)  
      battingPlayer <- batting[as.character(batting$playerID) == playerId, ]
      hitsPlayer <- battingPlayer$H
    }
     
    ### Calculate cumulative summation of hits for non-member
    cumsum(hitsPlayer)
     
  })
   
  ### Show table comparing currentMemberHits() and currentPlayerHits()
  ### NOT IMPLEMENTED!
  output$view <- renderTable({
    data.frame("X" = currentMemberHits())#, "Y" = currentPlayerHits())
  })
   
  ### Output xy-scatterplot
  output$cumsumPlot <- renderPlot({
    plot(seq(1, maxYears, 1), rep(0, maxYears), type = "n",
         lwd = 2, xlim = c(0, maxYears), ylim = c(0, maxHits),
         xlab = "Year", ylab = "Hits")
    segments(x0 = -100, x1 = 1000, y0 = 3000, y1 = 3000, lty = 2, lwd = 2,
             col = "black")
    for(i in 1:length(mHitsCumSum)){
      lines(seq(1, length(mHitsCumSum[[i]]), 1), mHitsCumSum[[i]], lwd = 2,
            col = "grey70")
    }
    lines(seq(1, length(currentMemberHits()), 1), currentMemberHits(), lwd = 2, 
        col = "magenta")
    lines(seq(1, length(currentPlayerHits()), 1), currentPlayerHits(), lwd = 2, 
          col = "blue")
  })
})

ui.R:

library(shiny)
 
# Define UI for miles per gallon application
shinyUI(pageWithSidebar(
   
  # Application title
  headerPanel("The 3000 Hit Club"),
   
  # Sidebar with controls to select a member of the 3,000 hit club
  # and input a non-member and plot their hit trajectory
  sidebarPanel(
     
    ### Dropdown menu to select a member of 3,000 hit club to highlight on 
    ### plot
    selectInput("member", "Member of 3000 hit Club:",
                list( "Pete Rose" = "rosepe01",
                      "Ty Cobb" = "cobbty01",
                      "Hank Aaron" = "aaronha01",
                      "Stan Musial" = "musiast01",    
                      "Tris Speaker" = "speaktr01",  
                      "Cap Anson" = "ansonca01",
                      "Honus Wagner" = "wagneho01",   
                      "Carl Yastrzemski" = "yastrca01",
                      "Paul Molitor" = "molitpa01",     
                      "Eddie Collins" = "collied01",
                      "Derek Jeter" = "jeterde01",     
                      "Willie Mays" = "mayswi01",    
                      "Eddie Murray" = "murraed02",
                      "Nap Lajoie" = "lajoina01",      
                      "Cal Ripken" = "ripkeca01",     
                      "George Brett" = "brettge01",   
                      "Paul Waner" = "wanerpa01",      
                      "Robin Yount" = "yountro01",     
                      "Tony Gwynn" = "gwynnto01",   
                      "Dave Winfield" = "winfida01",  
                      "Craig Biggio" = "biggicr01",
                      "Rickey Henderson" = "henderi01",
                      "Rod Carew" = "carewro01",      
                      "Lou Brock" = "brocklo01",    
                      "Rafael Palmeiro" = "palmera01",
                      "Wade Boggs" = "boggswa01",
                      "Al Kaline" = "kalinal01",
                      "Roberto Clemente" = "clemero01")),
 
     
    # To text input to select non-3000 hit member to plot hit trajectory
    textInput("player", "Player Name:", value = ""),
     
    # Button to update plot output
    submitButton("Update View"),
     
    #testing conditional input
    checkboxInput("smooth", "Smooth"),
    conditionalPanel(
      condition = "input.smooth == true",
      selectInput("smoothMethod", "Method",
                  list("lm", "glm", "gam", "loess", "rlm"))
    )
     
  ),
   
  # Show the output plot of the hit trajectory
  mainPanel(
    #tableOutput("view"),    
     
    plotOutput("cumsumPlot")
  )
))
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s