Note to self: Fixing encoding in Golang ascii85

Yesterday I spent a few hours dealing with what I like to call "the edges of StackOverflow". By that I mean those situations in which you are trying to solve a programming problem (mostly a bug) and you have no idea why its happening, and even worse, no amount of search (in StackOverflow or Github) yield any information that might seem somewhat related to the issue.

I think this xkcd strip puts it quite clearly:

xkcd

The issue in question was this. I am working on working on a project involving cookies. The standard procedure in programmatic media buying (i.e., online ads) is to codify the cookie data in ascii85 (or base85).

So I was implementing the encoding/decoding package using Golang's ascii85 package as follows:

package main

import (  
    "fmt"
    "encoding/json"
)




type User struct {  
   Age int
   Interests []string
}

func decodeCookie(cookieValue string) string {  
    cookieEncodedBytes := []byte(cookieValue)
    cookieDecodedBytes := make([]byte, len(cookieEncodedBytes))
    nCookieDecodedBytes, _, _ := ascii85.Decode(cookieDecodedBytes, cookieEncodedBytes, true)
    cookieDecodedBytes = cookieDecodedBytes[:nCookieDecodedBytes]
    return string(cookieDecodedBytes)
}

func encodeCookie(cookieValue string) string {  
    cookieBytes := []byte(cookieValue)
    cookieEncodedb85Bytes := make([]byte, ascii85.MaxEncodedLen(len(cookieBytes)))
    _ = ascii85.Encode(cookieEncodedb85Bytes, cookieBytes)
    cookieEncodedString := string(cookieEncodedb85Bytes)
    return cookieEncodedString
}


func main() {  
    user := User{
          25, 
          []string{"music", "football"},
    }

    userJson, _ := json.Marshal(user) 
    fmt.Println("User as json", string(userJson))

    userB85Encoded := encodeCookie(string(userJson))
    fmt.Println("User as jsonB85", userB85Encoded)


    userB85Decoded := decodeCookie(userB85Encoded)
    fmt.Println("User as json", userB85Encoded)

    decodedUser := User{}
    err := json.Unmarshal([]byte(userB85Decoded), &decodedUser)
    if err != nil {
        fmt.Println("Error deserializing json bytes", err)
    }

   fmt.Println(fmt.Sprintf("Deserialized User:%v", decodedUser))
}

This code will print the following output :

User as json {"Age":25,"Interests":["music","football"]}  
User as jsonB85 HQkagAKj/j2(TqCDKKH1ATMs7,!&pPD09o6@j3HJAoDU0@UX(h,$fTs  
User as json {"Age":25,"Interests":["music","football"]}  
Error deserializing json bytes invalid character '\x00' after top-level value  
Deserialized User:{0 []}  

So we see that, what we thought would be an easy encoding/decoding (easy encoding, HA!) implementation is failing for some reason. The error says:

Error deserializing json bytes invalid character '\x00' after top-level value

But where is that character? The character \x00 is the null byte, so when printed it does not show up in the output.

We can go further by checking the length of the encoded/encoded strings to see if there is a mismatch by adding a few lines:

package main

import (  
    "fmt"
    "encoding/json"
)




type User struct {  
   Age int
   Interests []string
}

func decodeCookie(cookieValue string) string {  
    cookieEncodedBytes := []byte(cookieValue)
    cookieDecodedBytes := make([]byte, len(cookieEncodedBytes))
    nCookieDecodedBytes, _, _ := ascii85.Decode(cookieDecodedBytes, cookieEncodedBytes, true)
    cookieDecodedBytes = cookieDecodedBytes[:nCookieDecodedBytes]
    return string(cookieDecodedBytes)
}

func encodeCookie(cookieValue string) string {  
    cookieBytes := []byte(cookieValue)
    cookieEncodedb85Bytes := make([]byte, ascii85.MaxEncodedLen(len(cookieBytes)))
    _ = ascii85.Encode(cookieEncodedb85Bytes, cookieBytes)
    cookieEncodedString := string(cookieEncodedb85Bytes)
    return cookieEncodedString
}


func main() {  
    user := User{
          25, 
          []string{"music", "football"},
    }

    userOriginalJson, _ := json.Marshal(user) 
    fmt.Println("User as json", string(userOriginalJson))

    userB85Encoded := encodeCookie(string(userOriginalJson))
    fmt.Println("User as jsonB85", userB85Encoded)


    userB85DecodedJson := decodeCookie(userB85Encoded)
    fmt.Println("User as json", userB85DecodedJson)

    decodedUser := User{}
    err := json.Unmarshal([]byte(userB85DecodedJson), &decodedUser)
    if err != nil {
        fmt.Println("Error deserializing json bytes", err)
    }

   fmt.Println(fmt.Sprintf("Deserialized User:%v", decodedUser))

   //NOW WE ADD THESE LINES

   fmt.Println("length of original json string", len(userOriginalJson))
   fmt.Println("length of decoded json string", len(userB85DecodedJson))
}

Now the two last lines of output will show:

length of original json string 43  
length of decoded json string 44  

So we see that there is a difference between the original and the decoded string! How is that possible?

The only hint I found about why this might be happening is in the ridiculously succint (as usual) ascii85 go documentation:

|[...] The encoding handles 4-byte chunks, using a special encoding for the last fragment[...]

So what if the issue is that because the input length to decodeCookie (the json string) is not a multiple of 4 ascii85 adds null values to the nearest multiple, turning a 43 length byte array into a 44 length byte array?

We can fix this by removing the null bytes from the output byte array, using the convenient bytes.trim function:

package main

import (  
    "fmt"
    "bytes"
    "encoding/json"
    "encoding/ascii85"
)




type User struct {  
   Age int
   Interests []string
}

func decodeCookie(cookieValue string) string {  
    cookieEncodedBytes := []byte(cookieValue)
    cookieDecodedBytes := make([]byte, len(cookieEncodedBytes))
    nCookieDecodedBytes, _, _ := ascii85.Decode(cookieDecodedBytes, cookieEncodedBytes, true)
    cookieDecodedBytes = cookieDecodedBytes[:nCookieDecodedBytes]

        //ascii85 adds /x00 null bytes at the end
    cookieDecodedBytes = bytes.Trim(cookieDecodedBytes, "\x00")
    return string(cookieDecodedBytes)
}

func encodeCookie(cookieValue string) string {  
    cookieBytes := []byte(cookieValue)
    cookieEncodedb85Bytes := make([]byte, ascii85.MaxEncodedLen(len(cookieBytes)))
    _ = ascii85.Encode(cookieEncodedb85Bytes, cookieBytes)
    cookieEncodedString := string(cookieEncodedb85Bytes)
    return cookieEncodedString
}

func main() {  
    user := User{
          25, 
          []string{"music", "football"},
    }

    userOriginalJson, _ := json.Marshal(user) 
    fmt.Println("User as json", string(userOriginalJson))

    userB85Encoded := encodeCookie(string(userOriginalJson))
    fmt.Println("User as jsonB85", userB85Encoded)


    userB85DecodedJson := decodeCookie(userB85Encoded)
    fmt.Println("User as json", userB85DecodedJson)

    decodedUser := User{}
    err := json.Unmarshal([]byte(userB85DecodedJson), &decodedUser)
    if err != nil {
        fmt.Println("Error deserializing json bytes", err)
    }

   fmt.Println(fmt.Sprintf("Deserialized User:%v", decodedUser))

   //NOW WE ADD THESE LINES

   fmt.Println("length of original json string", len(userOriginalJson))
   fmt.Println("length of decoded json string", len(userB85DecodedJson))

here is a go playground link to the code above.

Now the output is as expected:

User as json {"Age":25,"Interests":["music","football"]}  
User as jsonB85 HQkagAKj/j2(TqCDKKH1ATMs7,!&pPD09o6@j3HJAoDU0@UX(h,$fTs  
User as json {"Age":25,"Interests":["music","football"]}  
Deserialized User:{25 [music football]}  
length of original json string 43  
length of decoded json string 43  

And that fixes the issue! I hope that in the future the Golang community will focus a bit more on documentation and examples.

Thats all, thanks for reading!