Apply a function that iterate over a group in data.table r -


i have sample data.table below

dt = data.table(a= c("a","a","a","a","b","b"), b= c    ("d","d","d","d","e","e"), c= c("my name abc","i going   school","name bond","my school xyz","my name abc set 2","my name   abc set 1") ) 

now need find cosine similarity between every row , other row of column "c", within group, grouped column "a" , column "b" , place text maximum cosine value in new column "d", below.

dt2 = data.table(a= c("a","a","a","a","b","b"), b=  c("d","d","d","d","e","e"), c= c("my name abc", "i going school","name bond","my school xyz", "my name abc  set 2","my  name abc set 1"),  d= c("name bond" ,"i going school","my name abc", "my school xyz","my name abc set 1","my name abc set 2")) 

below cosine function, returns value similarity between 2 character vectors. have commented out code, since creates temporary files

#library(lsa) #cosine = function(x,y){ #td = tempfile() #dir.create(td) #f1 <- unlist(strsplit( as.character(x), split = " "))   #f1 = f1[grepl("[[:alnum:]]",f1 )] #f2 <- unlist(strsplit( as.character(y), split = " "))   #f2 = f2[grepl("[[:alnum:]]",f2 )] #write( c(f1), file=paste(td, "d1", sep="/")) #write( c(f2), file=paste(td, "d2", sep="/")) #mymatrix = textmatrix(td, minwordlength=1) #unlink(td, recursive=true) #res <- lsa::cosine(mymatrix[,1], mymatrix[,2]) #return(res) #} 

i think should this, not have idea implement it

testm[, lapply(.sd,match:= cosine(x,y)), by= .(columna,columnb), .sdcols      =   c  ("description")] 


Comments

Popular posts from this blog

javascript - Create a stacked percentage column -

Optimising Firebase database by automatically overwriting data -

javascript - Angular UI-Grid customTemplate directive causing rows to load slowly/? -