这是一些处理您关于匹配名称的问题的第二部分的代码。(顺便说一句,包括样本数据总是有帮助的)
require(dplyr) ## for join
# create two example dataframes
d1 <- data.frame(player_first_last=c("Joe Smith", "Jack Johnson", "A Jones", "Slow Mo"), d1_value=c(1,2,3,4))
d2 <- data.frame(player_last_first2=c("SmithJo","JohnsJa", "MoSl"), d2_value=c(11,12,13))
# first two letters of first name using substr, and trimming whitespace
d1$player_first2 <- trimws(substr(d1$player_first, 1, 2 ))
# last name - regex capturing everything after the first whitespace
# this is a "regular expressions"
d1$player_last <- gsub(".+\\s(.+)", "\\1", d1$player_first_last)
# concatenate last 5 and first 2 using paste0
d1$player_last_first2 <- paste0(substr(d1$player_last,1,5), d1$player_first2)
# join on constructed name -- full_join in case there are differences
d3 <- full_join(d1, d2, by="player_last_first2")
结果在 d3
player_first_last d1_value player_first2 player_last player_last_first2 d2_value
1 Joe Smith 1 Jo Smith SmithJo 1
2 Jack Johnson 2 Ja Johnson JohnsJa 2
3 A Jones 3 A Jones JonesA NA
4 Slow Mo 4 Sl Mo MoSl 3