SQL quoting

John Mount

2019-09-15

rquery does not parse expression of the SQL node. Thus it does not know which tokens are string-literals, and which are column names. To assist with this user can annotate column names with .[].

library("rquery")

date_cutoff <- '2017-04-02'

td <- mk_td("df", 
            c("cust",
              "trans_date",
              "sales"))

ops <- td %.>%
  select_rows_se(
    ., 
    qe(trans_date <=  str_to_date(.(date_cutoff), '%Y-%m-%d'))) %.>%
  sql_node(
    .,
    qae(max_date = max(.[trans_date]),
        const_col1 = "a'",
        const_col2 = 'a"'),
    mods = "GROUP BY .[cust]",
    orig_columns = FALSE)

cat(format(ops))
## mk_td("df", c(
##   "cust",
##   "trans_date",
##   "sales")) %.>%
##  select_rows(.,
##    trans_date <= str_to_date("2017-04-02", "%Y-%m-%d")) %.>%
##  sql_node(.,
##           max_date %:=% max( trans_date )
##              const_col1 %:=% "a'"
##              const_col2 %:=% "a\"";
##            GROUP BY  "cust",
##              *=FALSE)

Notice how this renders into different SQL for each of our two database descriptions.

db1 <- rquery_db_info(
  identifier_quote_char = "'",
  string_quote_char = '"')

cat(to_sql(ops, db1))
## SELECT
##  max( 'trans_date' ) AS 'max_date',
##  "a'" AS 'const_col1',
##  "a\"" AS 'const_col2'
## FROM (
##  SELECT * FROM (
##   SELECT
##    'cust',
##    'trans_date',
##    'sales'
##   FROM
##    'df'
##  ) tsql_98439044642113206064_0000000000
##  WHERE 'trans_date' <= str_to_date ( "2017-04-02" , "%Y-%m-%d" )
## ) tsql_98439044642113206064_0000000001 GROUP BY  'cust'
db2 <- rquery_db_info(
  identifier_quote_char = '"',
  string_quote_char = "'")
  
cat(to_sql(ops, db2))
## SELECT
##  max( "trans_date" ) AS "max_date",
##  'a'' AS "const_col1",
##  'a\'' AS "const_col2"
## FROM (
##  SELECT * FROM (
##   SELECT
##    "cust",
##    "trans_date",
##    "sales"
##   FROM
##    "df"
##  ) tsql_69971034639503336613_0000000000
##  WHERE "trans_date" <= str_to_date ( '2017-04-02' , '%Y-%m-%d' )
## ) tsql_69971034639503336613_0000000001 GROUP BY  "cust"

Notice our included quote characters were translated. This is an unwanted side-effect of using the qae() shortcut which does not tokenize, so it does not know which quotes are in which roles in the expression.

To override quote translation in the sql_node() (which is simple string substitution without quote context or escaping) you must use the clunkier “build up a string as a list of tokens” (where sub-listed items are string literals and names are column names).

ops <- td %.>%
  select_rows_se(
    ., 
    qe(trans_date <=  str_to_date(.(date_cutoff), '%Y-%m-%d'))) %.>%
  sql_node(
    .,
    list(
      "max_date" %:=% "max(.[trans_date])",
      "const_col1" = list(list("a'")),
      "const_col2" = list(list('b"'))),
    mods = "GROUP BY .[cust]",
    orig_columns = FALSE)

cat(format(ops))
## mk_td("df", c(
##   "cust",
##   "trans_date",
##   "sales")) %.>%
##  select_rows(.,
##    trans_date <= str_to_date("2017-04-02", "%Y-%m-%d")) %.>%
##  sql_node(.,
##            %:=% max( trans_date )
##              const_col1 %:=% list("a'")
##              const_col2 %:=% list("b\"");
##            GROUP BY  "cust",
##              *=FALSE)
cat(to_sql(ops, db1))
## SELECT
##  NA,
##  "a'" AS 'const_col1',
##  "b\"" AS 'const_col2'
## FROM (
##  SELECT * FROM (
##   SELECT
##    'cust',
##    'trans_date',
##    'sales'
##   FROM
##    'df'
##  ) tsql_51574324830299520888_0000000000
##  WHERE 'trans_date' <= str_to_date ( "2017-04-02" , "%Y-%m-%d" )
## ) tsql_51574324830299520888_0000000001 GROUP BY  'cust'
cat(to_sql(ops, db2))
## SELECT
##  NA,
##  'a\'' AS "const_col1",
##  'b"' AS "const_col2"
## FROM (
##  SELECT * FROM (
##   SELECT
##    "cust",
##    "trans_date",
##    "sales"
##   FROM
##    "df"
##  ) tsql_83314287335586063464_0000000000
##  WHERE "trans_date" <= str_to_date ( '2017-04-02' , '%Y-%m-%d' )
## ) tsql_83314287335586063464_0000000001 GROUP BY  "cust"

Quotes within string literals are going to be a nightmare moving from db to db, so our advice is to try to avoid them.