R function that works on previous observation -


i have collection of user's ip addresses , associated time hit particular website. trying change in time between each ip address change. make easier, i've assigned each row label whether or not represents change previous row, , i've done on per-user basis.

sample data:

user.nm ip.addr.txt login.sessn.ts  change.label     b   c   2/18/2013 16:08 false    b   c   2/18/2013 16:08 false    b   c   2/28/2013 13:37 false    b   c   2/28/2013 16:10 false    b   c   2/28/2013 16:20 false    b   c   3/5/2013 9:29   false    b   c   3/6/2013 11:42  false    b   c   3/11/2013 13:55 false   <- b   b   6/25/2013 13:22 true    <- b   d   6/25/2013 13:22 false   <- b   b   8/12/2013 13:18 true    <- b   c   8/12/2013 13:18 false    b   c   8/20/2013 15:13 false    b   c   8/20/2013 15:13 false    b   c   9/23/2013 14:08 false    b   c   9/23/2013 14:09 false    b   c   9/25/2013 11:00 false    b   c   10/18/2013 16:54    false    b   c   10/18/2013 16:54    false    b   c   10/30/2013 14:33    false    b   c   11/8/2013 15:03 false    b   c   11/18/2013 11:30    false    b   c   11/18/2013 11:33    false    b   c   11/20/2013 16:08    false    b   c   11/21/2013 11:51    false    b   c   11/21/2013 11:52    false    b   c   11/21/2013 15:18    false    b   c   11/21/2013 16:40    false    b   c   11/21/2013 16:44    false    b   c   11/21/2013 16:45    false    b   c   11/21/2013 16:45    false    b   c   11/29/2013 15:41    false    b   c   11/29/2013 15:41    false        1/9/2013 15:32  false        1/9/2013 15:32  false        1/9/2013 15:32  false        1/9/2013 15:32  false        1/10/2013 10:39 false        1/10/2013 10:39 false        1/10/2013 10:39 false        1/11/2013 10:31 false        1/11/2013 10:31 false        1/18/2013 12:30 false        2/22/2013 10:54 false   <-   b   3/6/2013 12:27  true    <- 

dput:

sample.data=structure(list(user.nm = c("b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a"),      ip.addr.txt = c("c", "c", "c", "c", "c", "c", "c", "c", "b",      "c", "b", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c",      "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c",      "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "b"     ), login.sessn.ts = structure(c(1361221680, 1361221680, 1362076620,      1362085800, 1362086400, 1362493740, 1362588120, 1363024500,      1372180920, 1372180920, 1376327880, 1376327880, 1377025980,      1377025980, 1379959680, 1379959740, 1380121200, 1382129640,      1382129640, 1383157980, 1383940980, 1384792200, 1384792380,      1384981680, 1385052660, 1385052720, 1385065080, 1385070000,      1385070240, 1385070300, 1385070300, 1385757660, 1385757660,      1357763520, 1357763520, 1357763520, 1357763520, 1357832340,      1357832340, 1357832340, 1357918260, 1357918260, 1358530200,      1361548440, 1362590820), class = c("posixct", "posixt"), tzone = ""),      change.label = c(false, false, false, false, false, false,      false, false, true, false, true, false, false, false, false,      false, false, false, false, false, false, false, false, false,      false, false, false, false, false, false, false, false, false,      false, false, false, false, false, false, false, false, false,      false, false, true)), .names = c("user.nm", "ip.addr.txt",  "login.sessn.ts", "change.label"), row.names = c(na, -45l), class = "data.frame") 

i attempting write ddply summarize statement give me difference in time between each ip change per user (among other things). normally, subset df observations have label true , use ddply dataframe. however, need difference between pairs of rows false followed true.

ideally, output dataframe this:

user.nm change count    min.change.tme  max.change.time   2   10 sec  4 hours b   1   1 hour  1 hour 

i hoping use kind of index lookup function match, i'm not sure how translate function.

is there kind of "look-behind" function in r this?

my code getting number of ip changes works far, , below:

did.change<-function(vec){   #consumes vector   #returns p-1  boolean vector of instances element not directly repeated (duplicated)   b.vec=head(vec, -1)==tail(vec, -1)   return(!b.vec) } ###this function works on entire list of entries per user, broad time.changes<-function(vec){   a=head(vec-1)-tail(vec,-1)   return(abs(a)) }   user.changes=ddply(sample.data, c("user.nm"), summarize,                     change.count=sum(did.change(ip.addr.txt)))                   #max.change.time=max(time.changes(login.sessn.ts)),                   #min.change.time=min(time.changes(login.sessn.ts))) 

short answer: yes, , called diff!

long answer:

is_diff <- which(diff(sample.data$change.label)==1)  ss <- do.call(c,lapply(is_diff,function(x) c(x,x+1)))  sample.data[ss,] user.nm ip.addr.txt      login.sessn.ts change.label 8        b           c 2013-03-11 10:55:00        false 9        b           b 2013-06-25 10:22:00         true 10       b           c 2013-06-25 10:22:00        false 11       b           b 2013-08-12 10:18:00         true 44                 2013-02-22 07:54:00        false 45                 b 2013-03-06 09:27:00         true 

here 1 way calculate changes in login times:

ss_list <- lapply(is_diff,function(x) c(x,x+1)) logins <- lapply(ss_list,function(x) sample.data[x,"login.sessn.ts"])  library(lubridate) lapply(logins,function(x) diff(ymd_hms(x))) 

if want break down user.nm, try using dplyr:

library(dplyr)   sample.data %>%   mutate(rownum = 1:nrow(sample.data)) %>%   filter(rownum %in% ss) %>%   group_by(user.nm) %>%   mutate(change = login.sessn.ts - lag(login.sessn.ts))    user.nm ip.addr.txt      login.sessn.ts change.label rownum            change 1       b           c 2013-03-11 10:55:00        false      8           na days 2       b           b 2013-06-25 10:22:00         true      9 9.156420e+06 days 3       b           c 2013-06-25 10:22:00        false     10 0.000000e+00 days 4       b           b 2013-08-12 10:18:00         true     11 4.146960e+06 days 5                 2013-02-22 07:54:00        false     44           na days 6                 b 2013-03-06 09:27:00         true     45 1.206458e+01 days 

Comments

Popular posts from this blog

google api - Incomplete response from Gmail API threads.list -

Installing Android SQLite Asset Helper -

Qt Creator - Searching files with Locator including folder -