Note to self: Fixing encoding in Golang ascii85
Yesterday I spent a few hours dealing with what I like to call "the edges of StackOverflow". By that I mean those situations in which you are trying to solve a programming problem (mostly a bug) and you have no idea why its happening, and even worse, no amount of search (in StackOverflow or Github) yield any information that might seem somewhat related to the issue.
I think this xkcd strip puts it quite clearly:
The issue in question was this. I am working on working on a project involving cookies. The standard procedure in programmatic media buying (i.e., online ads) is to codify the cookie data in ascii85 (or base85).
So I was implementing the encoding/decoding package using Golang's ascii85 package as follows:
package main
import (
"fmt"
"encoding/json"
)
type User struct {
Age int
Interests []string
}
func decodeCookie(cookieValue string) string {
cookieEncodedBytes := []byte(cookieValue)
cookieDecodedBytes := make([]byte, len(cookieEncodedBytes))
nCookieDecodedBytes, _, _ := ascii85.Decode(cookieDecodedBytes, cookieEncodedBytes, true)
cookieDecodedBytes = cookieDecodedBytes[:nCookieDecodedBytes]
return string(cookieDecodedBytes)
}
func encodeCookie(cookieValue string) string {
cookieBytes := []byte(cookieValue)
cookieEncodedb85Bytes := make([]byte, ascii85.MaxEncodedLen(len(cookieBytes)))
_ = ascii85.Encode(cookieEncodedb85Bytes, cookieBytes)
cookieEncodedString := string(cookieEncodedb85Bytes)
return cookieEncodedString
}
func main() {
user := User{
25,
[]string{"music", "football"},
}
userJson, _ := json.Marshal(user)
fmt.Println("User as json", string(userJson))
userB85Encoded := encodeCookie(string(userJson))
fmt.Println("User as jsonB85", userB85Encoded)
userB85Decoded := decodeCookie(userB85Encoded)
fmt.Println("User as json", userB85Encoded)
decodedUser := User{}
err := json.Unmarshal([]byte(userB85Decoded), &decodedUser)
if err != nil {
fmt.Println("Error deserializing json bytes", err)
}
fmt.Println(fmt.Sprintf("Deserialized User:%v", decodedUser))
}
This code will print the following output :
User as json {"Age":25,"Interests":["music","football"]}
User as jsonB85 HQkagAKj/j2(TqCDKKH1ATMs7,!&pPD09o6@j3HJAoDU0@UX(h,$fTs
User as json {"Age":25,"Interests":["music","football"]}
Error deserializing json bytes invalid character '\x00' after top-level value
Deserialized User:{0 []}
So we see that, what we thought would be an easy encoding/decoding (easy encoding, HA!) implementation is failing for some reason. The error says:
Error deserializing json bytes invalid character '\x00' after top-level value
But where is that character? The character \x00
is the null byte, so when printed it does not show up in the output.
We can go further by checking the length of the encoded/encoded strings to see if there is a mismatch by adding a few lines:
package main
import (
"fmt"
"encoding/json"
)
type User struct {
Age int
Interests []string
}
func decodeCookie(cookieValue string) string {
cookieEncodedBytes := []byte(cookieValue)
cookieDecodedBytes := make([]byte, len(cookieEncodedBytes))
nCookieDecodedBytes, _, _ := ascii85.Decode(cookieDecodedBytes, cookieEncodedBytes, true)
cookieDecodedBytes = cookieDecodedBytes[:nCookieDecodedBytes]
return string(cookieDecodedBytes)
}
func encodeCookie(cookieValue string) string {
cookieBytes := []byte(cookieValue)
cookieEncodedb85Bytes := make([]byte, ascii85.MaxEncodedLen(len(cookieBytes)))
_ = ascii85.Encode(cookieEncodedb85Bytes, cookieBytes)
cookieEncodedString := string(cookieEncodedb85Bytes)
return cookieEncodedString
}
func main() {
user := User{
25,
[]string{"music", "football"},
}
userOriginalJson, _ := json.Marshal(user)
fmt.Println("User as json", string(userOriginalJson))
userB85Encoded := encodeCookie(string(userOriginalJson))
fmt.Println("User as jsonB85", userB85Encoded)
userB85DecodedJson := decodeCookie(userB85Encoded)
fmt.Println("User as json", userB85DecodedJson)
decodedUser := User{}
err := json.Unmarshal([]byte(userB85DecodedJson), &decodedUser)
if err != nil {
fmt.Println("Error deserializing json bytes", err)
}
fmt.Println(fmt.Sprintf("Deserialized User:%v", decodedUser))
//NOW WE ADD THESE LINES
fmt.Println("length of original json string", len(userOriginalJson))
fmt.Println("length of decoded json string", len(userB85DecodedJson))
}
Now the two last lines of output will show:
length of original json string 43
length of decoded json string 44
So we see that there is a difference between the original and the decoded string! How is that possible?
The only hint I found about why this might be happening is in the ridiculously succint (as usual) ascii85 go documentation:
|[...] The encoding handles 4-byte chunks, using a special encoding for the last fragment[...]
So what if the issue is that because the input length to decodeCookie (the json string) is not a multiple of 4 ascii85
adds null values to the nearest multiple, turning a 43 length byte array into a 44 length byte array?
We can fix this by removing the null bytes from the output byte array, using the convenient bytes.trim
function:
package main
import (
"fmt"
"bytes"
"encoding/json"
"encoding/ascii85"
)
type User struct {
Age int
Interests []string
}
func decodeCookie(cookieValue string) string {
cookieEncodedBytes := []byte(cookieValue)
cookieDecodedBytes := make([]byte, len(cookieEncodedBytes))
nCookieDecodedBytes, _, _ := ascii85.Decode(cookieDecodedBytes, cookieEncodedBytes, true)
cookieDecodedBytes = cookieDecodedBytes[:nCookieDecodedBytes]
//ascii85 adds /x00 null bytes at the end
cookieDecodedBytes = bytes.Trim(cookieDecodedBytes, "\x00")
return string(cookieDecodedBytes)
}
func encodeCookie(cookieValue string) string {
cookieBytes := []byte(cookieValue)
cookieEncodedb85Bytes := make([]byte, ascii85.MaxEncodedLen(len(cookieBytes)))
_ = ascii85.Encode(cookieEncodedb85Bytes, cookieBytes)
cookieEncodedString := string(cookieEncodedb85Bytes)
return cookieEncodedString
}
func main() {
user := User{
25,
[]string{"music", "football"},
}
userOriginalJson, _ := json.Marshal(user)
fmt.Println("User as json", string(userOriginalJson))
userB85Encoded := encodeCookie(string(userOriginalJson))
fmt.Println("User as jsonB85", userB85Encoded)
userB85DecodedJson := decodeCookie(userB85Encoded)
fmt.Println("User as json", userB85DecodedJson)
decodedUser := User{}
err := json.Unmarshal([]byte(userB85DecodedJson), &decodedUser)
if err != nil {
fmt.Println("Error deserializing json bytes", err)
}
fmt.Println(fmt.Sprintf("Deserialized User:%v", decodedUser))
//NOW WE ADD THESE LINES
fmt.Println("length of original json string", len(userOriginalJson))
fmt.Println("length of decoded json string", len(userB85DecodedJson))
here is a go playground link to the code above.
Now the output is as expected:
User as json {"Age":25,"Interests":["music","football"]}
User as jsonB85 HQkagAKj/j2(TqCDKKH1ATMs7,!&pPD09o6@j3HJAoDU0@UX(h,$fTs
User as json {"Age":25,"Interests":["music","football"]}
Deserialized User:{25 [music football]}
length of original json string 43
length of decoded json string 43
And that fixes the issue! I hope that in the future the Golang community will focus a bit more on documentation and examples.
Thats all, thanks for reading!