Apply a function that iterate over a group in data.table r -
i have sample data.table below
dt = data.table(a= c("a","a","a","a","b","b"), b= c ("d","d","d","d","e","e"), c= c("my name abc","i going school","name bond","my school xyz","my name abc set 2","my name abc set 1") ) now need find cosine similarity between every row , other row of column "c", within group, grouped column "a" , column "b" , place text maximum cosine value in new column "d", below.
dt2 = data.table(a= c("a","a","a","a","b","b"), b= c("d","d","d","d","e","e"), c= c("my name abc", "i going school","name bond","my school xyz", "my name abc set 2","my name abc set 1"), d= c("name bond" ,"i going school","my name abc", "my school xyz","my name abc set 1","my name abc set 2")) below cosine function, returns value similarity between 2 character vectors. have commented out code, since creates temporary files
#library(lsa) #cosine = function(x,y){ #td = tempfile() #dir.create(td) #f1 <- unlist(strsplit( as.character(x), split = " ")) #f1 = f1[grepl("[[:alnum:]]",f1 )] #f2 <- unlist(strsplit( as.character(y), split = " ")) #f2 = f2[grepl("[[:alnum:]]",f2 )] #write( c(f1), file=paste(td, "d1", sep="/")) #write( c(f2), file=paste(td, "d2", sep="/")) #mymatrix = textmatrix(td, minwordlength=1) #unlink(td, recursive=true) #res <- lsa::cosine(mymatrix[,1], mymatrix[,2]) #return(res) #} i think should this, not have idea implement it
testm[, lapply(.sd,match:= cosine(x,y)), by= .(columna,columnb), .sdcols = c ("description")]
Comments
Post a Comment