It is a continuation of previous post “Compare storage of string in memory for c++ and golang using dgb” where gbg debugger was used to read characters defined in string variable directly from memory. The example below shows how to read string bytes from memory using unsafe pointer. String encoding is UTF-8, and it contains Latin and Cyrillic letters, In UTF-8 1 Latin character uses 1 byte, but Cyrillic one has 2 byte representation.
It is the code:
package main
import (
"fmt"
"unsafe"
"unicode/utf8"
)
func main() {
type StringHeader struct {
Data unsafe.Pointer
Len int
}
str := "Hello Кириллица" // Hello Cyrillic
rCnt := utf8.RuneCountInString(str) // Number of letters
sLen := len(str) // Number of bytes
fmt.Printf("%s (Length in bytes=%d, Number of runes= %d)\n", str, sLen, rCnt)
var ptrStr = (*StringHeader)(unsafe.Pointer(&str))
var dataStr = ptrStr.Data
var lenStr = ptrStr.Len
fmt.Printf("Pointer to character sequence: 0x%x\n",dataStr)
fmt.Printf("Number of bytes: %d\n", lenStr)
index := 0
var char byte
var wchar uint16
var byteArr []byte
fmt.Printf("From unsafe pointer: ");
for i := 0; i<rCnt; i++ {
ptr := uintptr(dataStr) + uintptr(index)
char = *(*byte)(unsafe.Pointer(ptr))
index++
if (char & 0xd0) == 0xd0 {
wchar = *(*uint16)(unsafe.Pointer(ptr))
byteArr = append(byteArr, char)
byteArr = append(byteArr, byte(wchar / 0x100))
fmt.Printf("%s", string(byteArr))
byteArr = nil
index++
} else {
fmt.Printf("%c", char)
}
}
fmt.Printf("\n")
}
Compilation:
|
go build strunsafe.go |
Execution:
|
./strunsafe Hello Кириллица (Length in bytes=24, Number of runes= 15) Pointer to character sequence: 0x4c21d5 Number of bytes: 24 From unsafe pointer: Hello Кириллица |