# This tests the searchMetadata function. # library(testthat); library(gypsum); source("test-searchMetadata.R") library(DBI) library(RSQLite) tmp <- tempfile(fileext=".sqlite3") (function() { conn <- dbConnect(SQLite(), tmp) on.exit(dbDisconnect(conn)) dbWriteTable(conn, "versions", data.frame( vid = 1:3, project = "foo", asset = "bar", version = as.character(1:3), latest = c(FALSE, FALSE, TRUE), user = c("foo", "bar", "stuff"), time = 1:3 )) metadata <- list( list(first_name="mikoto", last_name="misaka", school="tokiwadai", ability="railgun", gender="female", comment="rank 3"), list(first_name="mitsuko", last_name="kongou", school="tokiwadai", ability="aerohand", gender="female"), list(first_name="kuroko", last_name="shirai", school="tokiwadai", ability="teleport", gender="female", affiliation="judgement"), list(first_name="misaki", last_name="shokuhou", school="tokiwadai", ability="mental out", gender="female", comment="rank 5"), list(first_name="ruiko", last_name="saten", school="sakugawa", gender="female"), list(first_name="kazari", last_name="uiharu", school="sakugawa", gender="female", affiliation="judgement"), list(first_name="accelerator", ability="vector manipulation", gender="male", comment="rank 1") ) dbWriteTable(conn, "paths", data.frame( pid = seq_along(metadata), vid = rep(1:3, length.out=length(metadata)), path = paste0(vapply(metadata, function(x) x$first_name, ""), ".txt"), metadata = vapply(metadata, jsonlite::toJSON, auto_unbox=TRUE, "") )) all.tokens <- unlist(strsplit(unique(unlist(metadata, use.names=FALSE)), " ")) dbWriteTable(conn, "tokens", data.frame(tid = seq_along(all.tokens), token = all.tokens)) all.fields <- unique(unlist(lapply(metadata, names))) dbWriteTable(conn, "fields", data.frame(fid = seq_along(all.fields), field = all.fields)) links <- list(pid = integer(0), fid = integer(0), tid = integer(0)) for (i in seq_along(metadata)) { my.fields <- names(metadata[[i]]) my.tokens <- lapply(metadata[[i]], function(x) unique(strsplit(x, " ")[[1]])) my.fields <- rep(my.fields, lengths(my.tokens)) my.tokens <- unlist(my.tokens, use.names=FALSE) links$pid <- c(links$pid, rep(i, length(my.tokens))) links$fid <- c(links$fid, match(my.fields, all.fields)) links$tid <- c(links$tid, match(my.tokens, all.tokens)) } dbWriteTable(conn, "links", data.frame(links)) })() test_that("searchMetadata works for text searches", { out <- searchMetadata(tmp, gsc("mikoto"), include.metadata=FALSE, latest=FALSE) expect_identical(out$path, "mikoto.txt") # Tokenization works correctly. out <- searchMetadata(tmp, gsc(" kuroko "), include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("kuroko.txt")) out <- searchMetadata(tmp, gsc("TOKIWADAI"), include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "mitsuko.txt", "kuroko.txt", "misaki.txt")) # Partial matching works correctly. query <- gsc("Mi%", partial=TRUE) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "mitsuko.txt", "misaki.txt")) # Field-specific matching works correctly. query <- gsc("sa%", partial=TRUE, field="last_name") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("ruiko.txt")) }) test_that("searchMetadata works for AND searches", { # AND automatically happens upon tokenization. out <- searchMetadata(tmp, gsc("sakugawa judgement"), include.metadata=FALSE, latest=FALSE) expect_identical(out$path, "kazari.txt") # We can also be more explicit. query <- gsc("rank") & gsc("male") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, "accelerator.txt") # Nested ANDs are handled properly. query <- (gsc("s%", partial=TRUE) & gsc("tokiwadai")) & gsc("judgement") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, "kuroko.txt") }) test_that("searchMetadata works for OR searches", { query <- gsc("uiharu") | gsc("rank") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "misaki.txt", "kazari.txt", "accelerator.txt")) # ORs work correctly with partial matches. query <- gsc("judgement") | gsc("Mi%", partial=TRUE) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "mitsuko.txt", "kuroko.txt", "misaki.txt", "kazari.txt")) # ORs work correctly with field matches. query <- gsc("mi%", partial=TRUE, field="last_name") | gsc("judgement") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "kuroko.txt", "kazari.txt")) # Nested ORs are collapsed properly. query <- (gsc("teleport") | gsc("aerohand")) | gsc("mental") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mitsuko.txt", "kuroko.txt", "misaki.txt")) query <- (gsc("%sa%", field="school", partial=TRUE) | gsc("mental")) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("misaki.txt", "ruiko.txt", "kazari.txt")) }) test_that("searchMetadata works with combined AND and OR searches", { # OR that contains an AND. query <- (gsc("judgement") & gsc("sakugawa")) | gsc("aerohand") | gsc("vector") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mitsuko.txt", "kazari.txt", "accelerator.txt")) # OR that contains multiple ANDs. query <- (gsc("judgement") & gsc("sakugawa")) | (gsc("female") & gsc("rank")) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "misaki.txt", "kazari.txt")) # AND that contains an OR. query <- gsc("rank") & (gsc("shokuhou") | gsc("kongou")) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("misaki.txt")) # AND that contains multiple ORs. query <- (gsc("rank") | gsc("judgement")) & (gsc("male") | gsc("teleport")) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("kuroko.txt", "accelerator.txt")) }) test_that("searchMetadata works for NOT searches", { query <- !gsc("uiharu") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "mitsuko.txt", "kuroko.txt", "misaki.txt", "ruiko.txt", "accelerator.txt")) query <- !gsc("mi%", partial=TRUE) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("kuroko.txt", "ruiko.txt", "kazari.txt", "accelerator.txt")) query <- !(gsc("uiharu") | gsc("rank")) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mitsuko.txt", "kuroko.txt", "ruiko.txt")) query <- gsc("rank") & !gsc("tokiwadai") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("accelerator.txt")) }) test_that("searchMetadata works for non-text-based searches", { query <- gsc(project="foo", asset="bar") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(sort(out$path), sort(c("mikoto.txt", "mitsuko.txt", "kuroko.txt", "misaki.txt", "ruiko.txt", "kazari.txt", "accelerator.txt"))) query <- gsc(version="2") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mitsuko.txt", "ruiko.txt")) query <- gsc(path="%ko_txt", partial=TRUE) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mitsuko.txt", "kuroko.txt", "ruiko.txt")) query <- gsc(user="foo") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(sort(out$path), sort(c("mikoto.txt", "misaki.txt", "accelerator.txt"))) query <- gsc(time=2, after=TRUE) & gsc(time=3, after=FALSE) out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(sort(out$path), sort(c("kuroko.txt", "kazari.txt"))) # Combines with the other searches. query <- gsc(path="kuroko.txt") | gsc("railgun") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(out$path, c("mikoto.txt", "kuroko.txt")) }) test_that("searchMetadata works with ill-defined filters", { # We return everything. out <- searchMetadata(tmp, gsc(" "), include.metadata=FALSE, latest=FALSE) expect_identical(nrow(out), 7L) # Ill-defined filters are ignored in boolean operations. query <- gsc("female") & gsc(" ") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(nrow(out), 6L) query <- gsc("male") | gsc(" ") out <- searchMetadata(tmp, query, include.metadata=FALSE, latest=FALSE) expect_identical(nrow(out), 1L) }) test_that("searchMetadata respects the other output options", { out <- searchMetadata(tmp, gsc("female")) expect_identical(out$path, c("kuroko.txt", "kazari.txt")) expect_identical(out$path, paste0(vapply(out$metadata, function(x) x$first_name, ""), ".txt")) })