It is a continuation of previous post “Compare storage of string in memory for c++ and golang using dgb” where gbg debugger was used to read characters defined in string variable directly from memory. The example below shows how to read string bytes from memory using unsafe pointer. String encoding is UTF-8, and it contains Latin and Cyrillic letters, In UTF-8 1 Latin character uses 1 byte, but Cyrillic one has 2 byte representation.
It is the code:
package main import ( "fmt" "unsafe" "unicode/utf8" ) func main() { type StringHeader struct { Data unsafe.Pointer Len int } str := "Hello Кириллица" // Hello Cyrillic rCnt := utf8.RuneCountInString(str) // Number of letters sLen := len(str) // Number of bytes fmt.Printf("%s (Length in bytes=%d, Number of runes= %d)\n", str, sLen, rCnt) var ptrStr = (*StringHeader)(unsafe.Pointer(&str)) var dataStr = ptrStr.Data var lenStr = ptrStr.Len fmt.Printf("Pointer to character sequence: 0x%x\n",dataStr) fmt.Printf("Number of bytes: %d\n", lenStr) index := 0 var char byte var wchar uint16 var byteArr []byte fmt.Printf("From unsafe pointer: "); for i := 0; i<rCnt; i++ { ptr := uintptr(dataStr) + uintptr(index) char = *(*byte)(unsafe.Pointer(ptr)) index++ if (char & 0xd0) == 0xd0 { wchar = *(*uint16)(unsafe.Pointer(ptr)) byteArr = append(byteArr, char) byteArr = append(byteArr, byte(wchar / 0x100)) fmt.Printf("%s", string(byteArr)) byteArr = nil index++ } else { fmt.Printf("%c", char) } } fmt.Printf("\n") }
Compilation:
go build strunsafe.go |
Execution:
./strunsafe Hello Кириллица (Length in bytes=24, Number of runes= 15) Pointer to character sequence: 0x4c21d5 Number of bytes: 24 From unsafe pointer: Hello Кириллица |