Skip to content

davharris/jqr

 
 

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

43 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

jqr

Build Status Coverage Status

R interface to jq http://stedolan.github.io/jq/

Install

Get dependencies not on CRAN

devtools::install_github("vsbuffalo/rivr")

To install jqr, after cloning the repo the first time, run (from the command line)

./bootstrap.R

which will download the 1.4 release of jq. This does not need to be run on subsequent runs, and will stop being required once this issue is resolved.

library("jqr")

Interfaces

low level

There's a low level interface in which you can execute jq code just as you would on the command line:

str <- '[{
    "foo": 1,
    "bar": 2
  },
  {
    "foo": 3,
    "bar": 4
  },
  {
    "foo": 5,
    "bar": 6
}]'
jq_(str, ".[]")
#> [1] "{\"foo\":1,\"bar\":2}" "{\"foo\":3,\"bar\":4}" "{\"foo\":5,\"bar\":6}"
jq_(str, "[.[] | {name: .foo} | keys]")
#> [1] "[[\"name\"],[\"name\"],[\"name\"]]"

high level

The other is higher level, and uses a suite of functions to construct queries. Queries are constucted, then excuted with the function jq().

Examples:

Index

x <- '[{"message": "hello", "name": "jenn"}, {"message": "world", "name": "beth"}]'
x %>% index() %>% jq
#> {"message":"hello","name":"jenn"} {"message":"world","name":"beth"}

Sort

'[8,3,null,6]' %>% sort %>% jq
#> [null,3,6,8]

reverse order

'[1,2,3,4]' %>%  reverse %>% jq
#> [4,3,2,1]

Show the query to be used using peek()

'[1,2,3,4]' %>%  reverse %>% peek
#> <jq query>
#>   query:  reverse

string operations

join

'["a","b,c,d","e"]' %>% join %>% jq
#> "a, b,c,d, e"
'["a","b,c,d","e"]' %>% join(`;`) %>% jq
#> "a; b,c,d; e"

ltrimstr

'["fo", "foo", "barfoo", "foobar", "afoo"]' %>% index() %>% ltrimstr(foo) %>% jq
#> "fo" "" "barfoo" "bar" "afoo"

rtrimstr

'["fo", "foo", "barfoo", "foobar", "foob"]' %>% index() %>% rtrimstr(foo) %>% jq
#> "fo" "" "bar" "foobar" "foob"

startswith

'["fo", "foo", "barfoo", "foobar", "barfoob"]' %>% index %>% startswith(foo) %>% jq
#> false true false true false

endswith

'["fo", "foo", "barfoo", "foobar", "barfoob"]' %>% index %>% endswith(foo) %>% jq
#> false true true false false

tojson, fromjson, tostring

'[1, "foo", ["foo"]]' %>% index %>% tostring %>% jq
#> "1" "foo" "[\"foo\"]"
'[1, "foo", ["foo"]]' %>% index %>% tojson %>% jq
#> "1" "\"foo\"" "[\"foo\"]"
'[1, "foo", ["foo"]]' %>% index %>% tojson %>% fromjson %>% jq
#> 1 "foo" ["foo"]

contains

'"foobar"' %>% contains("bar") %>% jq
#> true

unique

'[1,2,5,3,5,3,1,3]' %>% unique %>% jq
#> [1,2,3,5]

types

get type information for each element

'[0, false, [], {}, null, "hello"]' %>% types %>% jq
#> ["number","boolean","array","object","null","string"]
'[0, false, [], {}, null, "hello", true, [1,2,3]]' %>% types %>% jq
#> ["number","boolean","array","object","null","string","boolean","array"]

select elements by type

'[0, false, [], {}, null, "hello"]' %>% index() %>% type(booleans) %>% jq
#> false

key operations

get keys

str <- '{"foo": 5, "bar": 7}'
str %>% keys() %>% jq
#> ["bar","foo"]

delete by key name

str %>% del(bar) %>% jq
#> {"foo":5}

check for key existence

str3 <- '[[0,1], ["a","b","c"]]'
str3 %>% haskey(2) %>% jq
#> [false,true]
str3 %>% haskey(1,2) %>% jq
#> [true,false,true,true]

Select variables by name, and rename

'{"foo": 5, "bar": 7}' %>% select(a = .foo) %>% jq
#> {"a":5}

More complicated select(), using the included dataset githubcommits

githubcommits %>%
  index() %>% 
  select(sha = .sha, name = .commit.committer.name) %>% 
  jq(TRUE)
#> {"sha":["110e009996e1359d25b8e99e71f83b96e5870790"],"name":["Nicolas Williams"]}
#> {"sha":["7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"],"name":["Nicolas Williams"]}
#> {"sha":["a50e548cc5313c187483bc8fb1b95e1798e8ef65"],"name":["Nicolas Williams"]}
#> {"sha":["4b258f7d31b34ff5d45fba431169e7fd4c995283"],"name":["Nicolas Williams"]}
#> {"sha":["d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"],"name":["Nicolas Williams"]}

Maths

'{"a": 7}' %>%  do(.a + 1) %>% jq
#> 8
'{"a": [1,2], "b": [3,4]}' %>%  do(.a + .b) %>% jq
#> [1,2,3,4]
'{"a": [1,2], "b": [3,4]}' %>%  do(.a - .b) %>% jq
#> [1,2]
'{"a": 3}' %>%  do(4 - .a) %>% jq
#> 1
'["xml", "yaml", "json"]' %>%  do('. - ["xml", "yaml"]') %>% jq
#> ". - [\"xml\", \"yaml\"]"
'5' %>%  do(10 / . * 3) %>% jq
#> 6

comparisons

'[5,4,2,7]' %>% index() %>% do(. < 4) %>% jq
#> false false true false
'[5,4,2,7]' %>% index() %>% do(. > 4) %>% jq
#> true false false true
'[5,4,2,7]' %>% index() %>% do(. <= 4) %>% jq
#> false true true false
'[5,4,2,7]' %>% index() %>% do(. >= 4) %>% jq
#> true true false true
'[5,4,2,7]' %>% index() %>% do(. == 4) %>% jq
#> false true false false
'[5,4,2,7]' %>% index() %>% do(. != 4) %>% jq
#> true false true true

length

'[[1,2], "string", {"a":2}, null]' %>% index %>% length %>% jq
#> 2 6 1 0

sqrt

'9' %>% sqrt %>% jq
#> 3

floor

'3.14159' %>% floor %>% jq
#> 3

find minimum

'[5,4,2,7]' %>% min %>% jq
#> 2
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% min %>% jq
#> {"foo":2,"bar":3}
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% min(foo) %>% jq
#> {"foo":1,"bar":14}
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% min(bar) %>% jq
#> {"foo":2,"bar":3}

find maximum

'[5,4,2,7]' %>% max %>% jq
#> 7
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% max %>% jq
#> {"foo":1,"bar":14}
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% max(foo) %>% jq
#> {"foo":2,"bar":3}
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% max(bar) %>% jq
#> {"foo":1,"bar":14}

Combine into valid JSON

jq sometimes creates pieces of JSON that are valid in themselves, but together are not. combine() is a way to make valid JSON.

This outputs a few pieces of JSON

(x <- githubcommits %>% 
  index() %>%
  select(sha = .sha, name = .commit.committer.name) %>% 
  jq(TRUE))
#> {"sha":["110e009996e1359d25b8e99e71f83b96e5870790"],"name":["Nicolas Williams"]}
#> {"sha":["7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"],"name":["Nicolas Williams"]}
#> {"sha":["a50e548cc5313c187483bc8fb1b95e1798e8ef65"],"name":["Nicolas Williams"]}
#> {"sha":["4b258f7d31b34ff5d45fba431169e7fd4c995283"],"name":["Nicolas Williams"]}
#> {"sha":["d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"],"name":["Nicolas Williams"]}

Use combine() to put them together.

combine(x)
#> [{"sha":["110e009996e1359d25b8e99e71f83b96e5870790"],"name":["Nicolas Williams"]}, {"sha":["7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"],"name":["Nicolas Williams"]}, {"sha":["a50e548cc5313c187483bc8fb1b95e1798e8ef65"],"name":["Nicolas Williams"]}, {"sha":["4b258f7d31b34ff5d45fba431169e7fd4c995283"],"name":["Nicolas Williams"]}, {"sha":["d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"],"name":["Nicolas Williams"]}]

Streaming

Write mtcars to a temporary file

writeLines(jsonlite::toJSON(mtcars, collapse = FALSE),
             tmp <- tempfile())

Build a file iterator

it_f <- rivr::file_iterator(tmp)
it_j <- jq_iterator(it_f, '{cyl: ."cyl"}')
replicate(NROW(mtcars), it_j$yield())
#>  [1] "{\"cyl\":6}" "{\"cyl\":6}" "{\"cyl\":4}" "{\"cyl\":6}" "{\"cyl\":8}"
#>  [6] "{\"cyl\":6}" "{\"cyl\":8}" "{\"cyl\":4}" "{\"cyl\":4}" "{\"cyl\":6}"
#> [11] "{\"cyl\":6}" "{\"cyl\":8}" "{\"cyl\":8}" "{\"cyl\":8}" "{\"cyl\":8}"
#> [16] "{\"cyl\":8}" "{\"cyl\":8}" "{\"cyl\":4}" "{\"cyl\":4}" "{\"cyl\":4}"
#> [21] "{\"cyl\":4}" "{\"cyl\":8}" "{\"cyl\":8}" "{\"cyl\":8}" "{\"cyl\":8}"
#> [26] "{\"cyl\":4}" "{\"cyl\":4}" "{\"cyl\":4}" "{\"cyl\":8}" "{\"cyl\":6}"
#> [31] "{\"cyl\":8}" "{\"cyl\":4}"

the streaming bit is a work in progress

Meta

rofooter

Releases

No releases published

Packages

No packages published

Languages

  • R 87.3%
  • C++ 8.2%
  • Makefile 4.4%
  • Shell 0.1%