SlideShare a Scribd company logo
1 of 35
Processing XML and Spreadsheet in Go
续 日
Gopher China Conference
Beijing 2021 6/26 - 6/27
Self Introduction
The author of the Excelize - Go language spreadsheet library. Familiar
with Go language programming, middleware, and big data solution.
Working Experiences
Alibaba Group - Software Engineer
Baidu Inc. - Software Engineer
Qihoo 360 – Server-side Software Engineer
GitHub: @xuri
Twitter: @xurime
Blog: https://xuri.me
Agenda
Serialize and Deserialize 01
• Document Object Model
• Event-driven (Simple API for XML)
• Serialize and Deserialize Control
Handle Complex XML 02
• Partial Load
• Namespace & Entity
• Ser/Deserialize Idempotence
High Performance Processing 03
• XML Schema Definition
• DOM or SAX
OOXML Spreadsheets 04
• Excel XML Specification
• Charset Encoding
• Streaming I/O
Serialize and Deserialize
Document Object Model
<?xml version="1.0" encoding="utf-8"?>
<Person>
<Name>Tom</Name>
<Email where="home">
<Addr>tom@example.com</Addr>
</Email>
</Person>
type Person struct {
Name string
Email struct {
Where string `xml:"where,attr"`
Addr string
}
}
encoding/xml
var p Person
if err := xml.Unmarshal([]byte(data), &p); err != nil {
fmt.Println(err)
}
fmt.Printf("%+vn", p)
// {Name:Tom Email:{Where:home Addr:tom@example.com}}
XML Finite State Machine
0
start start tag
NAME
TEXT
equal
end tag
value
value
end value
COMMENT
version
blank/enter
letter
digit
<
?
?>
-->
!--
= " "
' '
>
blank
/
letter
>
blank
Unmarshal
Unmarshal
NewDecoder
Decoder.unmarshal Decoder.switchToReader
unmarshalPath
unmarshalAttr unmarshalInterface
unmarshalTextInterface Decoder.RawToken
Decoder.pushElement Decoder.pushNs
encoding/xml
marshal.go
typeinfo.go
xml.go
read.go
example & test
Go XML Parser
type Decoder struct {
Strict bool
AutoClose []string
Entity map[string]string
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
DefaultSpace string
r io.ByteReader
t TokenReader
buf bytes.Buffer
saved *bytes.Buffer
stk *stack
free *stack
needClose bool
toClose Name
nextToken Token
nextByte int
ns map[string]string
err error
line int
offset int64
unmarshalDepth int
}
encoding/xml:xml.go
StartElement
EndElement
CharData
Comment
ProcInst
Directive
Event-driven (Simple API for XML)
decoder := xml.NewDecoder(strings.NewReader(data))
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
fmt.Printf("%+vn", element)
case xml.EndElement:
fmt.Printf("%+vn", element)
}
}
<?xml version="1.0" encoding="utf-8"?>
<Person>
<Name>Tom</Name>
<Email where="home">
<Addr>tom@example.com</Addr>
</Email>
</Person>
{Name:{Space: Local:Person} Attr:[]}
{Name:{Space: Local:Name} Attr:[]}
{Name:{Space: Local:Name}}
{Name:{Space: Local:Email} Attr:[{Name:{Space: Local:where} Value:home}]}
{Name:{Space: Local:Addr} Attr:[]}
{Name:{Space: Local:Addr}}
{Name:{Space: Local:Email}}
{Name:{Space: Local:Person}}
Serialize and Deserialize Control
switch flag {
case "attr":
finfo.flags |= fAttr
case "cdata":
finfo.flags |= fCDATA
case "chardata":
finfo.flags |= fCharData
case "innerxml":
finfo.flags |= fInnerXML
case "comment":
finfo.flags |= fComment
case "any":
finfo.flags |= fAny
case "omitempty":
finfo.flags |= fOmitEmpty
}
encoding/xml:typeinfo.go
type Person struct {
Name string
Email struct {
Where string `xml:"where,attr,omitempty"`
Addr string
}
}
attribute with the field name in the XML element
written as character data, not as an XML element
written as character data wrapped in one or more <![CDATA[ ... ]]> tags
written verbatim, not subject to the usual marshaling procedure
unmatched rule, maps the sub-element to that struct field
omitted if the field value is empty
Partial Load
<?xml version="1.0" encoding="utf-8"?>
<Person>
<Name>Tom</Name>
<Email>
<Addr>tom@example.com</Addr>
</Email>
</Person>
type Person struct {
Name string
Email partialXML
}
type partialXML struct {
Content string `xml:",innerxml"`
}
var p Person
err := xml.Unmarshal([]byte(data), &p)
if err != nil {
fmt.Println(err)
}
fmt.Printf("%+vn", p)
{Name:Tom Email:{Content:
<Addr>tom@example.com</Addr>
}}
Handle Complex XML
Datatypes
Go Datatypes XML Datatypes
string
anyType, ENTITY,ID, IDREF, NCName,
NMTOKEN, Name, anyURI, duration,
language, normalizedString, string, token,
xml:lang, xml:space, xml:base,xml:id
[]string ENTITIES, IDREFS, NMTOKENS, NOTATION
xml.Name QName
[]byte base64Binary, hexBinary, unsignedByte
bool boolean
byte byte
float64 decimal, double, float,
int64
int, integer, long, negativeInteger,
nonNegativeInteger, nonPositiveInteger,
positiveInteger, short
uint64 unsignedInt, unsignedLong, unsignedShort
time.Time
date, dateTime, gDay, gMonth, gMonthDay,
gYear, gYearMonth,time
anyType
anySimpleType
all complex types
gYearMonth gYear gMonthDay gDay gMonth
date
time
dateTime
duration
boolean base64Binary hexBinary float double anyURI QName NOTATION
decimal
string
normalizedString integer
token long nonNegativeInteger
nonPostitveInteger
language Name NMTOKEN negativeInteger int unsignedLong positiveInteger
NCName NMTOKENS sort unsignedInt
ID IDREF ENTITY
ENTITIES
IDREFS
bytes unsignedSort
unsignedByte
ur types
build-in primitive types
build-in derived types
complex types
derived by restriction
Derived by list
Derived by extension or
restriction
Entity
XML Entity
exp := `<!ENTITYs+([^s]+)s+"([^"]+)">`
entities := map[string]string{}
d := xml.NewDecoder(strings.NewReader(input))
var rEntity = regexp.MustCompile(exp)
for {
tok, err := d.Token()
if err != nil {
break
}
dir, ok := tok.(xml.Directive)
if !ok {
continue
}
fmt.Println(string(dir))
for _, m := range rEntity.FindAllSubmatch(dir,-1) {
entities[string(m[1])] = string(m[2])
}
}
fmt.Println("entities", entities)
type Person struct {
XMLName xml.Name `xml:"person"`
Name string `xml:"name"`
Address string `xml:"address"`
}
entities map[name:Tom email:tom@example.com]
Get Entity
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE person[
<!ENTITY name "Tom">
<!ENTITY email "tom@example.com">
]>
<person>
<name>&name;</name>
<address>&email;</address>
</person>
Entity
XML Entity
d = xml.NewDecoder(strings.NewReader(input))
d.Strict = false
d.Entity = entities
err := d.Decode(&v)
if err != nil {
fmt.Printf("error: %v", err)
return
}
fmt.Printf("%+vn", v)
{XMLName:{Space: Local:company} Name:Jack Address:Tom}
Decode with Entity
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE person[
<!ENTITY name "Tom">
<!ENTITY email "tom@example.com">
]>
<person>
<name>&name;</name>
<address>&email;</address>
</person>
type Person struct {
XMLName xml.Name `xml:"person"`
Name string `xml:"name"`
Address string `xml:"address"`
}
Namespace & Ser/Deserialize Idempotence
<?xml version="1.0" encoding="utf-8"?>
<person
xmlns="http://example.com/default"
xmlns:m="http://example.com/main"
xmlns:h="http://example.com/home"
xmlns:w="http://example.com/work">
<name>Tom</name>
<m:email h:addr="HOME" w:addr="WORK" />
</person>
type Person struct {
XMLName xml.Name `xml:"http://example.com/default person"`
Name string `xml:"name"`
Email struct {
XMLName xml.Name `xml:"http://example.com/main email"`
HomeAddr string `xml:"http://example.com/home addr,attr"`
WorkAddr string `xml:"http://example.com/work addr,attr"`
} // TAG NOT HERE: `xml:"email"`
}
<person xmlns="http://example.com/default">
<name>Tom</name>
<email xmlns="http://example.com/main"
xmlns:home="http://example.com/home"
home:addr="HOME"
xmlns:work="http://example.com/work"
work:addr="WORK"></email>
</person>
Inline Namespace Declare
Root Element
NS Missing!
Namespace Local Name
Ser/Deserialize Idempotence
encoding/xml:xml.go
type Token interface{}
type EndElement struct {
Name Name
}
type Name struct {
Space, Local string
}
type Attr struct {
Name Name
Value string
}
type StartElement struct {
Name Name
Attr []Attr
}
// getRootEleAttr extract root element attributes by
// given XML decoder.
func getRootEleAttr(d *xml.Decoder) []xml.Attr {
tokenIdx := 0
for {
token, _ := d.Token()
if token == nil {
break
}
switch startElement := token.(type) {
case xml.StartElement:
tokenIdx++
if tokenIdx == 1 {
return startElement.Attr
}
}
}
return nil
}
Ser/Deserialize Idempotence
<?xml version="1.0" encoding="utf-8"?>
<person
xmlns="http://example.com/default"
xmlns:m="http://example.com/main"
xmlns:h="http://example.com/home"
xmlns:w="http://example.com/work">
<name>Tom</name>
<m:email h:addr="HOME" w:addr="WORK" />
</person>
decoder := xml.NewDecoder(strings.NewReader(data))
marshalXML := ""
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
for _, attr := range element.Attr {
if element.Name.Local == "person" {
colon := ""
if attr.Name.Space != "" {
colon = ":"
}
marshalXML += fmt.Sprintf("%s%s%s="%s" ",
attr.Name.Space, colon,
attr.Name.Local, attr.Value)
}
}
}
}
fmt.Printf("<person %s>n", marshalXML)
<person xmlns="http://example.com/default"
xmlns:m="http://example.com/main"
xmlns:h="http://example.com/home"
xmlns:w="http://example.com/work" >
<name>Tom</name>
<email xmlns="http://example.com/main"
xmlns:home="http://example.com/home"
home:addr="HOME"
xmlns:work="http://example.com/work"
work:addr="WORK"></email>
</person>
<?xml version="1.0" encoding="utf-8"?>
High Performance Processing
XML Components Data Model
<?xml version="1.0"?>
<note xmlns:m="http://example.com/main">
<to>Tom</to>
<from>Bob</from>
<heading>Reminder</heading>
<m:body>Don't forget me this weekend!</m:body>
</note>
<?xml version="1.0"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:m="http://example.com/main">
<xsd:import namespace="http://example.com/main" schemaLocation="shared.xsd"/>
<xs:element name="note">
<xs:complexType>
<xs:sequence>
<xs:element name="to" type="xs:string"/>
<xs:element name="from" type="xs:string"/>
<xs:element name="heading" type="xs:string"/>
<xs:element name="m:body" use="required"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
<?xml version="1.0"?>
<xs:schema
xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="body" type="xs:string"/>
</xs:schema>
shared.xsd
XML Components Data Model
type Note struct {
XMLName xml.Name `xml:"note"`
To string `xml:"to"`
From string `xml:"from"`
Heading string `xml:"heading"`
Body string `xml:"http://example.com/main body"`
}
<?xml version="1.0"?>
<note xmlns:m="http://example.com/main">
<to>Tom</to>
<from>Bob</from>
<heading>Reminder</heading>
<m:body>Don't forget me this weekend!</m:body>
</note>
XML Components Data Model
XSD: XML Schema Definition Process
is a named component and has two additional
properties - name and target namespace
is an un-named component
cmd
parser
NSResolver
proto
generator
Language Code
Attribute Attribute Group ComplexType Element Enumeration FractionDigits
Pattern
SimpleType
Schema
List
Length
Import
Include
Group
Restriction TotalDigits Union WhiteSpace MaxLength MinLength MinExclusive
Attribute Attribute Group FieldName FieldType
ComplexType
Element
Group
SimpleType
Generator
SAX Parser
Schema
Notation Declaration
system identifier
public identifier
Element Declaration
scope
value constraint
nillable
substitution group affiliation
substitution group exclusions
disallowed substitutions
abstract
Simple Type Declaration
facets
final
variety
Attribute Declaration
scope
value constraint
Identity-constraint Declaration
identity-constraint category
selector
fields
referenced key
Complex Type Declaration
derivation method
final
abstract
prohibited substitutions
Model Group Definition
Attribute Group Definition
Attribute Use
required
value constraint
Wildcard
namespace constraint
process contents
Particle
min occurs
max occurs
Model Group
compositor
notation declarations attribute declarations
type definitions
element declarations
attribute group definitions
model group definitions
type
definitions
identity-constraint
definitions
content type
type
definition
type
definition
term
content type
base type
definition
base
type
definition
base
type
definition
attribute
uses
attribute
wildcard
term
term
particles
model
group
attribute
wildcard
type
definition
attribute definitions
attribute uses
https://github.com/xuri/xgen
SAX or DOM
SAX Parser DOM Parser
Simple API for XML Parsing Document Object Model
Event-based parser Stays in a tree structure
Low memory usage High memory usage
Best for the larger size of XML files Best for the smaller sizes of files
Read-only Insert or delete nodes
Backward navigation is not possible Backward and forward search is possible
A small part of the XML file is only loaded in memory It loads whole XML documents in memory
OOXML Spreadsheets
OOXML ISO/IEC 29500 ECMA-376 Specification
SVG
SOAP
ODF
OOXML
C#
C++/CLI
MIME
XSLT
XHTML
DITA
0
200
400
600
800
1000
1200
0 1000 2000 3000 4000 5000 6000 7000
Technical
Committee
effort
(days)
Specification length (pages)
Specification Speed
OOXML Specification
WordprocessingML SpreadsheetML PresentationML
CustomML
Vocabularies
DrawingML Bibliography
Metadata Equations
VML (legacy)
Relationships Content Types Digital Signatures
Markup Languages
Open Packaging Convention
ZIP XML + Unicode
Core Technologies
OOXML Specification
WordprocessingML SpreadsheetML PresentationML
CustomML
Vocabularies
DrawingML Bibliography
Metadata Equations
VML (legacy)
Relationships Content Types Digital Signatures
Markup Languages
Open Packaging Convention
ZIP XML + Unicode
Core Technologies
Drawing 9918
Excel 6328
OPC 3147
Typical XML of the Cell Over 10589
Elements & Attributes
Shared Strings
Calc Chain
XML Maps
Styles
Theme
Sheets
Table
Chart Pivot Table
Pivot Cache
Pivot Records
Workbook
<c r="D3" s="7" t="s">
<v>0</v>
</c>
<xf numFmtId="0" fontId="4" fillId="2" borderId="2" xfId="1" applyBorder="1" />
<font>
<sz val="11"/>
<color theme="0"/>
<name val="Calibri"/>
<scheme val="minor"/>
</font>
<fill>
<patternFill patternType="solid">
<fgColor theme="4"/>
<bgColor theme=”4"/>
</patternFill>
</fill>
<a:theme xmlns:a="http://schemas.openxmlformats.org
<a:themeElements>
<a:clrScheme name="Office">
<a:dk1>
<a:sysClr val="windowText" />
</a:dk1>
<a:lt1>
<a:sysClr val="window" />
</a:lt1>
<a:dk2>
<a:srgbClr val="1F497D"/>
</a:dk2>
<a:lt2>
<a:srgbClr val="FAF3E8"/>
</a:lt2>
<a:accent1>
<a:srgbClr val="5C93B4"/>
</a:accent1>
<cellStyleXfs count="12">
<xf numFmtId="0" fontId="0" fillId="0" borderId="0"/>
<xf numFmtId="0" fontId="4" fillId="2" borderId="0" applyBorder="1" applyAlignment="1" applyProtection="1"/>
</protection>
</xf>
<cellStyles count="2">
<cellStyle name="Accent1" xfId="1" builtinId="29"/>
B C D E
Q1 Q2
Revenue 412.52
Start
(build-in) <border>
<left style="thick">
<color auto="1"/>
</left>
<right style="thick">
<color auto="1"/>
</right>
<top style="thick">
<color auto="1"/>
</top>
<bottom style="thick">
<color auto="1"/>
</bottom>
<diagonal/>
</border>
Charset Encoding
<?xml version='1.0' encoding='character encoding' standalone='yes|no'?>
import (
"golang.org/x/net/html/charset”
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
)
decoder = xml.NewDecoder(strings.NewReader(data))
decoder.CharsetReader = charset.NewReaderLabel
Ref: https://encoding.spec.whatwg.org
Name Labels
UTF-8 6
Legacy single-byte encodings 168
Legacy multi-byte Chinese
(simplified) encodings
10
Legacy multi-byte Chinese
(traditional) encodings
5
Legacy multi-byte Japanese
encodings
13
Legacy multi-byte Korean
encodings
10
Legacy miscellaneous encodings 16
228 Labels
// CharsetReader Decoder from all codepages to UTF-8
func CharsetReader(charset string, input io.Reader) io.Reader {
var enc encoding.Encoding
for i := range charmap.All {
item = charmap.All[i]
if strings.EqualFold(sm, nm) {
enc = item
}
}
return enc.NewDecoder().Reader(input)
}
Custom Charset Reader
Streaming I/O
Common Package Parts
Package
Relationships
Core Properties
Digital Signatures
Specific Format Parts
Office Document
Part
Relationships
XML Part
XML Part
Part
Rels
Etc…
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006">
<dimension ref="B2"/>
<sheetViews>
<sheetView tabSelected="1" workbookViewId="0" />
</sheetViews>
<sheetFormatPr baseColWidth="10" defaultRowHeight="16" />
<sheetData>
<row r="2">
<c r="B2">
<v>123</v>
</c>
</row>
</sheetData>
<pageMargins left="0.7" right="0.7" />
</worksheet>
A B C
1
2 123
3
4
Set Row
<sheetData>
<row r="2">
<c r="B2">
<v>123</v>
</c>
</row>
</sheetData>
func writeCell(buf *bufferedWriter, c xlsxC) {
_, _ = buf.WriteString(`<c`)
if c.XMLSpace.Value != "" {
fmt.Fprintf(buf, ` xml:%s="%s"`,
c.XMLSpace.Name.Local, c.XMLSpace.Value)
}
fmt.Fprintf(buf, ` r="%s"`, c.R)
if c.S != 0 {
fmt.Fprintf(buf, ` s="%d"`, c.S)
}
if c.T != "" {
fmt.Fprintf(buf, ` t="%s"`, c.T)
}
_, _ = buf.WriteString(`>`)
if c.F != nil {
_, _ = buf.WriteString(`<f>`)
_ = xml.EscapeText(buf, []byte(c.F.Content))
_, _ = buf.WriteString(`</f>`)
}
if c.V != "" {
_, _ = buf.WriteString(`<v>`)
_ = xml.EscapeText(buf, []byte(c.V))
_, _ = buf.WriteString(`</v>`)
}
_, _ = buf.WriteString(`</c>`)
}
type StreamWriter struct {
File *File
Sheet string
SheetID int
worksheet *xlsxWorksheet
rawData bufferedWriter
mergeCellsCount int
mergeCells string
tableParts string
}
Flush
func (sw *StreamWriter) Flush() error {
_, _ = sw.rawData.WriteString(`</sheetData>`)
bulkAppendFields(&sw.rawData, sw.worksheet, 8, 15)
if sw.mergeCellsCount > 0 {
sw.mergeCells = fmt.Sprintf(`<mergeCells
count="%d">%s</mergeCells>`, sw.mergeCellsCount, sw.mergeCells)
}
_, _ = sw.rawData.WriteString(sw.mergeCells)
bulkAppendFields(&sw.rawData, sw.worksheet, 17, 38)
_, _ = sw.rawData.WriteString(sw.tableParts)
bulkAppendFields(&sw.rawData, sw.worksheet, 40, 40)
_, _ = sw.rawData.WriteString(`</worksheet>`)
if err := sw.rawData.Flush(); err != nil {
return err
}
// ...
}
type StreamWriter struct {
File *File
Sheet string
SheetID int
worksheet *xlsxWorksheet
rawData bufferedWriter
mergeCellsCount int
mergeCells string
tableParts string
}
Generate XML Part
Save Spreadsheet
func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
buf := new(bytes.Buffer)
zw := zip.NewWriter(buf)
f.calcChainWriter()
f.commentsWriter()
f.contentTypesWriter()
f.drawingsWriter()
f.vmlDrawingWriter()
f.workBookWriter()
f.workSheetWriter()
f.relsWriter()
f.sharedStringsWriter()
f.styleSheetWriter()
for path, stream := range f.streams {
// Save stream data
stream.rawData.Close()
}
for path, content := range f.XLSX {
// Save preserve data
}
}
XML Part to ZIP
Common Package Parts
Package
Relationships
Specific Format Parts
Office Document
Part
Relationships
Style / Theme / Calc Chain
Chart / PivotTable / Comments
Part
Rels
Etc…
Worksheets / SST
Workbook
Performance
102400 Row x 50 Columns, 6 Chars / Cell
0 5 10 15 20 25 30
Excelize 2.3.1@9316028 Streaming Write
go1.15.2 darwin/amd64
Excelize 2.3.1@9316028
go1.15.2 darwin/amd64
Time Cost (s)
Less is better
5.12 Million Cells
0 200 400 600 800 1000 1200 1400 1600 1800
Excelize 2.3.1@9316028 Streaming Write
go1.15.2 darwin/amd64
Excelize 2.3.1@9316028
go1.15.2 darwin/amd64
Memory Usage (MB)
Less is better
https://github.com/xuri/excelize
Processing XML and Spreadsheet in Go
Gopher China Conference
Beijing 2021 6/26 - 6/27
续日

More Related Content

What's hot

생산성을 높여주는 iOS 개발 방법들.pdf
생산성을 높여주는 iOS 개발 방법들.pdf생산성을 높여주는 iOS 개발 방법들.pdf
생산성을 높여주는 iOS 개발 방법들.pdfssuserb942d2
 
Avoiding callback hell in Node js using promises
Avoiding callback hell in Node js using promisesAvoiding callback hell in Node js using promises
Avoiding callback hell in Node js using promisesAnkit Agarwal
 
Javascript ES6 generators
Javascript ES6 generatorsJavascript ES6 generators
Javascript ES6 generatorsRamesh Nair
 
Document Object Model (DOM)
Document Object Model (DOM)Document Object Model (DOM)
Document Object Model (DOM)GOPAL BASAK
 
gRPC: The Story of Microservices at Square
gRPC: The Story of Microservices at SquaregRPC: The Story of Microservices at Square
gRPC: The Story of Microservices at SquareApigee | Google Cloud
 
Functional Programming In JS
Functional Programming In JSFunctional Programming In JS
Functional Programming In JSDamian Łabas
 
In-depth analysis of Kotlin Flows
In-depth analysis of Kotlin FlowsIn-depth analysis of Kotlin Flows
In-depth analysis of Kotlin FlowsGlobalLogic Ukraine
 
Can You Do That with APEX? Building Not So Straightforward Pages
Can You Do That with APEX? Building Not So Straightforward PagesCan You Do That with APEX? Building Not So Straightforward Pages
Can You Do That with APEX? Building Not So Straightforward PagesDimitri Gielis
 
Node.js File system & Streams
Node.js File system & StreamsNode.js File system & Streams
Node.js File system & StreamsEyal Vardi
 
React + Redux Introduction
React + Redux IntroductionReact + Redux Introduction
React + Redux IntroductionNikolaus Graf
 
Angular - Chapter 5 - Directives
 Angular - Chapter 5 - Directives Angular - Chapter 5 - Directives
Angular - Chapter 5 - DirectivesWebStackAcademy
 
002- JavaFX Tutorial - Getting Started
002- JavaFX Tutorial - Getting Started002- JavaFX Tutorial - Getting Started
002- JavaFX Tutorial - Getting StartedMohammad Hossein Rimaz
 
Scope rules : local and global variables
Scope rules : local and global variablesScope rules : local and global variables
Scope rules : local and global variablessangrampatil81
 
Chapter 8 c solution
Chapter 8 c solutionChapter 8 c solution
Chapter 8 c solutionAzhar Javed
 
Polymorphism in C# Function overloading in C#
Polymorphism in C# Function overloading in C#Polymorphism in C# Function overloading in C#
Polymorphism in C# Function overloading in C#Abid Kohistani
 

What's hot (20)

Html 5 geolocation api
Html 5 geolocation api Html 5 geolocation api
Html 5 geolocation api
 
생산성을 높여주는 iOS 개발 방법들.pdf
생산성을 높여주는 iOS 개발 방법들.pdf생산성을 높여주는 iOS 개발 방법들.pdf
생산성을 높여주는 iOS 개발 방법들.pdf
 
Avoiding callback hell in Node js using promises
Avoiding callback hell in Node js using promisesAvoiding callback hell in Node js using promises
Avoiding callback hell in Node js using promises
 
Promises, Promises
Promises, PromisesPromises, Promises
Promises, Promises
 
Javascript ES6 generators
Javascript ES6 generatorsJavascript ES6 generators
Javascript ES6 generators
 
Document Object Model (DOM)
Document Object Model (DOM)Document Object Model (DOM)
Document Object Model (DOM)
 
gRPC: The Story of Microservices at Square
gRPC: The Story of Microservices at SquaregRPC: The Story of Microservices at Square
gRPC: The Story of Microservices at Square
 
Javascript
JavascriptJavascript
Javascript
 
Java script
Java scriptJava script
Java script
 
Functional Programming In JS
Functional Programming In JSFunctional Programming In JS
Functional Programming In JS
 
In-depth analysis of Kotlin Flows
In-depth analysis of Kotlin FlowsIn-depth analysis of Kotlin Flows
In-depth analysis of Kotlin Flows
 
Can You Do That with APEX? Building Not So Straightforward Pages
Can You Do That with APEX? Building Not So Straightforward PagesCan You Do That with APEX? Building Not So Straightforward Pages
Can You Do That with APEX? Building Not So Straightforward Pages
 
Break and continue
Break and continueBreak and continue
Break and continue
 
Node.js File system & Streams
Node.js File system & StreamsNode.js File system & Streams
Node.js File system & Streams
 
React + Redux Introduction
React + Redux IntroductionReact + Redux Introduction
React + Redux Introduction
 
Angular - Chapter 5 - Directives
 Angular - Chapter 5 - Directives Angular - Chapter 5 - Directives
Angular - Chapter 5 - Directives
 
002- JavaFX Tutorial - Getting Started
002- JavaFX Tutorial - Getting Started002- JavaFX Tutorial - Getting Started
002- JavaFX Tutorial - Getting Started
 
Scope rules : local and global variables
Scope rules : local and global variablesScope rules : local and global variables
Scope rules : local and global variables
 
Chapter 8 c solution
Chapter 8 c solutionChapter 8 c solution
Chapter 8 c solution
 
Polymorphism in C# Function overloading in C#
Polymorphism in C# Function overloading in C#Polymorphism in C# Function overloading in C#
Polymorphism in C# Function overloading in C#
 

Similar to Processing XML and Spreadsheet data in Go (20)

Xml11
Xml11Xml11
Xml11
 
Xml Presentation-1
Xml Presentation-1Xml Presentation-1
Xml Presentation-1
 
Xml
XmlXml
Xml
 
Xml overview
Xml overviewXml overview
Xml overview
 
XML.ppt
XML.pptXML.ppt
XML.ppt
 
Xml
XmlXml
Xml
 
Xml
XmlXml
Xml
 
Internet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web Technology
Internet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web TechnologyInternet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web Technology
Internet and Web Technology (CLASS-7) [XML and AJAX] | NIC/NIELIT Web Technology
 
Xml
XmlXml
Xml
 
2310 b 12
2310 b 122310 b 12
2310 b 12
 
XXE
XXEXXE
XXE
 
Xxe
XxeXxe
Xxe
 
Understanding XML DOM
Understanding XML DOMUnderstanding XML DOM
Understanding XML DOM
 
Xml
XmlXml
Xml
 
Xml intro1
Xml intro1Xml intro1
Xml intro1
 
Dtd
DtdDtd
Dtd
 
Creating Domain Specific Languages in Python
Creating Domain Specific Languages in PythonCreating Domain Specific Languages in Python
Creating Domain Specific Languages in Python
 
XML-Unit 1.ppt
XML-Unit 1.pptXML-Unit 1.ppt
XML-Unit 1.ppt
 
WEB PROGRAMMING
WEB PROGRAMMINGWEB PROGRAMMING
WEB PROGRAMMING
 
Introduction to xml
Introduction to xmlIntroduction to xml
Introduction to xml
 

Recently uploaded

Enjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort Service
Enjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort ServiceEnjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort Service
Enjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort ServiceDelhi Call girls
 
'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...
'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...
'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...APNIC
 
Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝soniya singh
 
Russian Call girls in Dubai +971563133746 Dubai Call girls
Russian  Call girls in Dubai +971563133746 Dubai  Call girlsRussian  Call girls in Dubai +971563133746 Dubai  Call girls
Russian Call girls in Dubai +971563133746 Dubai Call girlsstephieert
 
Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝soniya singh
 
Hot Service (+9316020077 ) Goa Call Girls Real Photos and Genuine Service
Hot Service (+9316020077 ) Goa  Call Girls Real Photos and Genuine ServiceHot Service (+9316020077 ) Goa  Call Girls Real Photos and Genuine Service
Hot Service (+9316020077 ) Goa Call Girls Real Photos and Genuine Servicesexy call girls service in goa
 
Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...
Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...
Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...SofiyaSharma5
 
Chennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts serviceChennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts servicesonalikaur4
 
AlbaniaDreamin24 - How to easily use an API with Flows
AlbaniaDreamin24 - How to easily use an API with FlowsAlbaniaDreamin24 - How to easily use an API with Flows
AlbaniaDreamin24 - How to easily use an API with FlowsThierry TROUIN ☁
 
Chennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts serviceChennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts servicevipmodelshub1
 
VIP Kolkata Call Girl Dum Dum 👉 8250192130 Available With Room
VIP Kolkata Call Girl Dum Dum 👉 8250192130  Available With RoomVIP Kolkata Call Girl Dum Dum 👉 8250192130  Available With Room
VIP Kolkata Call Girl Dum Dum 👉 8250192130 Available With Roomdivyansh0kumar0
 
Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝soniya singh
 
GDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark Web
GDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark WebGDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark Web
GDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark WebJames Anderson
 
₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...
₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...
₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...Diya Sharma
 
Best VIP Call Girls Noida Sector 75 Call Me: 8448380779
Best VIP Call Girls Noida Sector 75 Call Me: 8448380779Best VIP Call Girls Noida Sector 75 Call Me: 8448380779
Best VIP Call Girls Noida Sector 75 Call Me: 8448380779Delhi Call girls
 
FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607
FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607
FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607dollysharma2066
 
VIP Call Girls Kolkata Ananya 🤌 8250192130 🚀 Vip Call Girls Kolkata
VIP Call Girls Kolkata Ananya 🤌  8250192130 🚀 Vip Call Girls KolkataVIP Call Girls Kolkata Ananya 🤌  8250192130 🚀 Vip Call Girls Kolkata
VIP Call Girls Kolkata Ananya 🤌 8250192130 🚀 Vip Call Girls Kolkataanamikaraghav4
 
Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝soniya singh
 

Recently uploaded (20)

Enjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort Service
Enjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort ServiceEnjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort Service
Enjoy Night⚡Call Girls Dlf City Phase 3 Gurgaon >༒8448380779 Escort Service
 
'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...
'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...
'Future Evolution of the Internet' delivered by Geoff Huston at Everything Op...
 
Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Sukhdev Vihar Delhi 💯Call Us 🔝8264348440🔝
 
Russian Call girls in Dubai +971563133746 Dubai Call girls
Russian  Call girls in Dubai +971563133746 Dubai  Call girlsRussian  Call girls in Dubai +971563133746 Dubai  Call girls
Russian Call girls in Dubai +971563133746 Dubai Call girls
 
Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Pratap Nagar Delhi 💯Call Us 🔝8264348440🔝
 
Hot Service (+9316020077 ) Goa Call Girls Real Photos and Genuine Service
Hot Service (+9316020077 ) Goa  Call Girls Real Photos and Genuine ServiceHot Service (+9316020077 ) Goa  Call Girls Real Photos and Genuine Service
Hot Service (+9316020077 ) Goa Call Girls Real Photos and Genuine Service
 
Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...
Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...
Low Rate Young Call Girls in Sector 63 Mamura Noida ✔️☆9289244007✔️☆ Female E...
 
Chennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts serviceChennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Porur Phone 🍆 8250192130 👅 celebrity escorts service
 
AlbaniaDreamin24 - How to easily use an API with Flows
AlbaniaDreamin24 - How to easily use an API with FlowsAlbaniaDreamin24 - How to easily use an API with Flows
AlbaniaDreamin24 - How to easily use an API with Flows
 
Chennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts serviceChennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts service
Chennai Call Girls Alwarpet Phone 🍆 8250192130 👅 celebrity escorts service
 
VIP Kolkata Call Girl Dum Dum 👉 8250192130 Available With Room
VIP Kolkata Call Girl Dum Dum 👉 8250192130  Available With RoomVIP Kolkata Call Girl Dum Dum 👉 8250192130  Available With Room
VIP Kolkata Call Girl Dum Dum 👉 8250192130 Available With Room
 
Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Defence Colony Delhi 💯Call Us 🔝8264348440🔝
 
GDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark Web
GDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark WebGDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark Web
GDG Cloud Southlake 32: Kyle Hettinger: Demystifying the Dark Web
 
₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...
₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...
₹5.5k {Cash Payment}New Friends Colony Call Girls In [Delhi NIHARIKA] 🔝|97111...
 
Best VIP Call Girls Noida Sector 75 Call Me: 8448380779
Best VIP Call Girls Noida Sector 75 Call Me: 8448380779Best VIP Call Girls Noida Sector 75 Call Me: 8448380779
Best VIP Call Girls Noida Sector 75 Call Me: 8448380779
 
Call Girls In South Ex 📱 9999965857 🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SERVICE
Call Girls In South Ex 📱  9999965857  🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SERVICECall Girls In South Ex 📱  9999965857  🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SERVICE
Call Girls In South Ex 📱 9999965857 🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SERVICE
 
FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607
FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607
FULL ENJOY Call Girls In Mayur Vihar Delhi Contact Us 8377087607
 
Rohini Sector 26 Call Girls Delhi 9999965857 @Sabina Saikh No Advance
Rohini Sector 26 Call Girls Delhi 9999965857 @Sabina Saikh No AdvanceRohini Sector 26 Call Girls Delhi 9999965857 @Sabina Saikh No Advance
Rohini Sector 26 Call Girls Delhi 9999965857 @Sabina Saikh No Advance
 
VIP Call Girls Kolkata Ananya 🤌 8250192130 🚀 Vip Call Girls Kolkata
VIP Call Girls Kolkata Ananya 🤌  8250192130 🚀 Vip Call Girls KolkataVIP Call Girls Kolkata Ananya 🤌  8250192130 🚀 Vip Call Girls Kolkata
VIP Call Girls Kolkata Ananya 🤌 8250192130 🚀 Vip Call Girls Kolkata
 
Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝
Call Girls In Saket Delhi 💯Call Us 🔝8264348440🔝
 

Processing XML and Spreadsheet data in Go

  • 1. Processing XML and Spreadsheet in Go 续 日 Gopher China Conference Beijing 2021 6/26 - 6/27
  • 2. Self Introduction The author of the Excelize - Go language spreadsheet library. Familiar with Go language programming, middleware, and big data solution. Working Experiences Alibaba Group - Software Engineer Baidu Inc. - Software Engineer Qihoo 360 – Server-side Software Engineer GitHub: @xuri Twitter: @xurime Blog: https://xuri.me
  • 3. Agenda Serialize and Deserialize 01 • Document Object Model • Event-driven (Simple API for XML) • Serialize and Deserialize Control Handle Complex XML 02 • Partial Load • Namespace & Entity • Ser/Deserialize Idempotence High Performance Processing 03 • XML Schema Definition • DOM or SAX OOXML Spreadsheets 04 • Excel XML Specification • Charset Encoding • Streaming I/O
  • 5. Document Object Model <?xml version="1.0" encoding="utf-8"?> <Person> <Name>Tom</Name> <Email where="home"> <Addr>tom@example.com</Addr> </Email> </Person> type Person struct { Name string Email struct { Where string `xml:"where,attr"` Addr string } } encoding/xml var p Person if err := xml.Unmarshal([]byte(data), &p); err != nil { fmt.Println(err) } fmt.Printf("%+vn", p) // {Name:Tom Email:{Where:home Addr:tom@example.com}}
  • 6. XML Finite State Machine 0 start start tag NAME TEXT equal end tag value value end value COMMENT version blank/enter letter digit < ? ?> --> !-- = " " ' ' > blank / letter > blank
  • 7. Unmarshal Unmarshal NewDecoder Decoder.unmarshal Decoder.switchToReader unmarshalPath unmarshalAttr unmarshalInterface unmarshalTextInterface Decoder.RawToken Decoder.pushElement Decoder.pushNs encoding/xml marshal.go typeinfo.go xml.go read.go example & test
  • 8. Go XML Parser type Decoder struct { Strict bool AutoClose []string Entity map[string]string CharsetReader func(charset string, input io.Reader) (io.Reader, error) DefaultSpace string r io.ByteReader t TokenReader buf bytes.Buffer saved *bytes.Buffer stk *stack free *stack needClose bool toClose Name nextToken Token nextByte int ns map[string]string err error line int offset int64 unmarshalDepth int } encoding/xml:xml.go StartElement EndElement CharData Comment ProcInst Directive
  • 9. Event-driven (Simple API for XML) decoder := xml.NewDecoder(strings.NewReader(data)) for { token, _ := decoder.Token() if token == nil { break } switch element := token.(type) { case xml.StartElement: fmt.Printf("%+vn", element) case xml.EndElement: fmt.Printf("%+vn", element) } } <?xml version="1.0" encoding="utf-8"?> <Person> <Name>Tom</Name> <Email where="home"> <Addr>tom@example.com</Addr> </Email> </Person> {Name:{Space: Local:Person} Attr:[]} {Name:{Space: Local:Name} Attr:[]} {Name:{Space: Local:Name}} {Name:{Space: Local:Email} Attr:[{Name:{Space: Local:where} Value:home}]} {Name:{Space: Local:Addr} Attr:[]} {Name:{Space: Local:Addr}} {Name:{Space: Local:Email}} {Name:{Space: Local:Person}}
  • 10. Serialize and Deserialize Control switch flag { case "attr": finfo.flags |= fAttr case "cdata": finfo.flags |= fCDATA case "chardata": finfo.flags |= fCharData case "innerxml": finfo.flags |= fInnerXML case "comment": finfo.flags |= fComment case "any": finfo.flags |= fAny case "omitempty": finfo.flags |= fOmitEmpty } encoding/xml:typeinfo.go type Person struct { Name string Email struct { Where string `xml:"where,attr,omitempty"` Addr string } } attribute with the field name in the XML element written as character data, not as an XML element written as character data wrapped in one or more <![CDATA[ ... ]]> tags written verbatim, not subject to the usual marshaling procedure unmatched rule, maps the sub-element to that struct field omitted if the field value is empty
  • 11. Partial Load <?xml version="1.0" encoding="utf-8"?> <Person> <Name>Tom</Name> <Email> <Addr>tom@example.com</Addr> </Email> </Person> type Person struct { Name string Email partialXML } type partialXML struct { Content string `xml:",innerxml"` } var p Person err := xml.Unmarshal([]byte(data), &p) if err != nil { fmt.Println(err) } fmt.Printf("%+vn", p) {Name:Tom Email:{Content: <Addr>tom@example.com</Addr> }}
  • 13. Datatypes Go Datatypes XML Datatypes string anyType, ENTITY,ID, IDREF, NCName, NMTOKEN, Name, anyURI, duration, language, normalizedString, string, token, xml:lang, xml:space, xml:base,xml:id []string ENTITIES, IDREFS, NMTOKENS, NOTATION xml.Name QName []byte base64Binary, hexBinary, unsignedByte bool boolean byte byte float64 decimal, double, float, int64 int, integer, long, negativeInteger, nonNegativeInteger, nonPositiveInteger, positiveInteger, short uint64 unsignedInt, unsignedLong, unsignedShort time.Time date, dateTime, gDay, gMonth, gMonthDay, gYear, gYearMonth,time anyType anySimpleType all complex types gYearMonth gYear gMonthDay gDay gMonth date time dateTime duration boolean base64Binary hexBinary float double anyURI QName NOTATION decimal string normalizedString integer token long nonNegativeInteger nonPostitveInteger language Name NMTOKEN negativeInteger int unsignedLong positiveInteger NCName NMTOKENS sort unsignedInt ID IDREF ENTITY ENTITIES IDREFS bytes unsignedSort unsignedByte ur types build-in primitive types build-in derived types complex types derived by restriction Derived by list Derived by extension or restriction
  • 14. Entity XML Entity exp := `<!ENTITYs+([^s]+)s+"([^"]+)">` entities := map[string]string{} d := xml.NewDecoder(strings.NewReader(input)) var rEntity = regexp.MustCompile(exp) for { tok, err := d.Token() if err != nil { break } dir, ok := tok.(xml.Directive) if !ok { continue } fmt.Println(string(dir)) for _, m := range rEntity.FindAllSubmatch(dir,-1) { entities[string(m[1])] = string(m[2]) } } fmt.Println("entities", entities) type Person struct { XMLName xml.Name `xml:"person"` Name string `xml:"name"` Address string `xml:"address"` } entities map[name:Tom email:tom@example.com] Get Entity <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE person[ <!ENTITY name "Tom"> <!ENTITY email "tom@example.com"> ]> <person> <name>&name;</name> <address>&email;</address> </person>
  • 15. Entity XML Entity d = xml.NewDecoder(strings.NewReader(input)) d.Strict = false d.Entity = entities err := d.Decode(&v) if err != nil { fmt.Printf("error: %v", err) return } fmt.Printf("%+vn", v) {XMLName:{Space: Local:company} Name:Jack Address:Tom} Decode with Entity <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE person[ <!ENTITY name "Tom"> <!ENTITY email "tom@example.com"> ]> <person> <name>&name;</name> <address>&email;</address> </person> type Person struct { XMLName xml.Name `xml:"person"` Name string `xml:"name"` Address string `xml:"address"` }
  • 16. Namespace & Ser/Deserialize Idempotence <?xml version="1.0" encoding="utf-8"?> <person xmlns="http://example.com/default" xmlns:m="http://example.com/main" xmlns:h="http://example.com/home" xmlns:w="http://example.com/work"> <name>Tom</name> <m:email h:addr="HOME" w:addr="WORK" /> </person> type Person struct { XMLName xml.Name `xml:"http://example.com/default person"` Name string `xml:"name"` Email struct { XMLName xml.Name `xml:"http://example.com/main email"` HomeAddr string `xml:"http://example.com/home addr,attr"` WorkAddr string `xml:"http://example.com/work addr,attr"` } // TAG NOT HERE: `xml:"email"` } <person xmlns="http://example.com/default"> <name>Tom</name> <email xmlns="http://example.com/main" xmlns:home="http://example.com/home" home:addr="HOME" xmlns:work="http://example.com/work" work:addr="WORK"></email> </person> Inline Namespace Declare Root Element NS Missing! Namespace Local Name
  • 17. Ser/Deserialize Idempotence encoding/xml:xml.go type Token interface{} type EndElement struct { Name Name } type Name struct { Space, Local string } type Attr struct { Name Name Value string } type StartElement struct { Name Name Attr []Attr } // getRootEleAttr extract root element attributes by // given XML decoder. func getRootEleAttr(d *xml.Decoder) []xml.Attr { tokenIdx := 0 for { token, _ := d.Token() if token == nil { break } switch startElement := token.(type) { case xml.StartElement: tokenIdx++ if tokenIdx == 1 { return startElement.Attr } } } return nil }
  • 18. Ser/Deserialize Idempotence <?xml version="1.0" encoding="utf-8"?> <person xmlns="http://example.com/default" xmlns:m="http://example.com/main" xmlns:h="http://example.com/home" xmlns:w="http://example.com/work"> <name>Tom</name> <m:email h:addr="HOME" w:addr="WORK" /> </person> decoder := xml.NewDecoder(strings.NewReader(data)) marshalXML := "" for { token, _ := decoder.Token() if token == nil { break } switch element := token.(type) { case xml.StartElement: for _, attr := range element.Attr { if element.Name.Local == "person" { colon := "" if attr.Name.Space != "" { colon = ":" } marshalXML += fmt.Sprintf("%s%s%s="%s" ", attr.Name.Space, colon, attr.Name.Local, attr.Value) } } } } fmt.Printf("<person %s>n", marshalXML) <person xmlns="http://example.com/default" xmlns:m="http://example.com/main" xmlns:h="http://example.com/home" xmlns:w="http://example.com/work" > <name>Tom</name> <email xmlns="http://example.com/main" xmlns:home="http://example.com/home" home:addr="HOME" xmlns:work="http://example.com/work" work:addr="WORK"></email> </person> <?xml version="1.0" encoding="utf-8"?>
  • 20. XML Components Data Model <?xml version="1.0"?> <note xmlns:m="http://example.com/main"> <to>Tom</to> <from>Bob</from> <heading>Reminder</heading> <m:body>Don't forget me this weekend!</m:body> </note> <?xml version="1.0"?> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:m="http://example.com/main"> <xsd:import namespace="http://example.com/main" schemaLocation="shared.xsd"/> <xs:element name="note"> <xs:complexType> <xs:sequence> <xs:element name="to" type="xs:string"/> <xs:element name="from" type="xs:string"/> <xs:element name="heading" type="xs:string"/> <xs:element name="m:body" use="required"/> </xs:sequence> </xs:complexType> </xs:element> </xs:schema> <?xml version="1.0"?> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="body" type="xs:string"/> </xs:schema> shared.xsd
  • 21. XML Components Data Model type Note struct { XMLName xml.Name `xml:"note"` To string `xml:"to"` From string `xml:"from"` Heading string `xml:"heading"` Body string `xml:"http://example.com/main body"` } <?xml version="1.0"?> <note xmlns:m="http://example.com/main"> <to>Tom</to> <from>Bob</from> <heading>Reminder</heading> <m:body>Don't forget me this weekend!</m:body> </note>
  • 22. XML Components Data Model XSD: XML Schema Definition Process is a named component and has two additional properties - name and target namespace is an un-named component cmd parser NSResolver proto generator Language Code Attribute Attribute Group ComplexType Element Enumeration FractionDigits Pattern SimpleType Schema List Length Import Include Group Restriction TotalDigits Union WhiteSpace MaxLength MinLength MinExclusive Attribute Attribute Group FieldName FieldType ComplexType Element Group SimpleType Generator SAX Parser Schema Notation Declaration system identifier public identifier Element Declaration scope value constraint nillable substitution group affiliation substitution group exclusions disallowed substitutions abstract Simple Type Declaration facets final variety Attribute Declaration scope value constraint Identity-constraint Declaration identity-constraint category selector fields referenced key Complex Type Declaration derivation method final abstract prohibited substitutions Model Group Definition Attribute Group Definition Attribute Use required value constraint Wildcard namespace constraint process contents Particle min occurs max occurs Model Group compositor notation declarations attribute declarations type definitions element declarations attribute group definitions model group definitions type definitions identity-constraint definitions content type type definition type definition term content type base type definition base type definition base type definition attribute uses attribute wildcard term term particles model group attribute wildcard type definition attribute definitions attribute uses https://github.com/xuri/xgen
  • 23. SAX or DOM SAX Parser DOM Parser Simple API for XML Parsing Document Object Model Event-based parser Stays in a tree structure Low memory usage High memory usage Best for the larger size of XML files Best for the smaller sizes of files Read-only Insert or delete nodes Backward navigation is not possible Backward and forward search is possible A small part of the XML file is only loaded in memory It loads whole XML documents in memory
  • 25. OOXML ISO/IEC 29500 ECMA-376 Specification SVG SOAP ODF OOXML C# C++/CLI MIME XSLT XHTML DITA 0 200 400 600 800 1000 1200 0 1000 2000 3000 4000 5000 6000 7000 Technical Committee effort (days) Specification length (pages) Specification Speed
  • 26. OOXML Specification WordprocessingML SpreadsheetML PresentationML CustomML Vocabularies DrawingML Bibliography Metadata Equations VML (legacy) Relationships Content Types Digital Signatures Markup Languages Open Packaging Convention ZIP XML + Unicode Core Technologies
  • 27. OOXML Specification WordprocessingML SpreadsheetML PresentationML CustomML Vocabularies DrawingML Bibliography Metadata Equations VML (legacy) Relationships Content Types Digital Signatures Markup Languages Open Packaging Convention ZIP XML + Unicode Core Technologies Drawing 9918 Excel 6328 OPC 3147
  • 28. Typical XML of the Cell Over 10589 Elements & Attributes Shared Strings Calc Chain XML Maps Styles Theme Sheets Table Chart Pivot Table Pivot Cache Pivot Records Workbook <c r="D3" s="7" t="s"> <v>0</v> </c> <xf numFmtId="0" fontId="4" fillId="2" borderId="2" xfId="1" applyBorder="1" /> <font> <sz val="11"/> <color theme="0"/> <name val="Calibri"/> <scheme val="minor"/> </font> <fill> <patternFill patternType="solid"> <fgColor theme="4"/> <bgColor theme=”4"/> </patternFill> </fill> <a:theme xmlns:a="http://schemas.openxmlformats.org <a:themeElements> <a:clrScheme name="Office"> <a:dk1> <a:sysClr val="windowText" /> </a:dk1> <a:lt1> <a:sysClr val="window" /> </a:lt1> <a:dk2> <a:srgbClr val="1F497D"/> </a:dk2> <a:lt2> <a:srgbClr val="FAF3E8"/> </a:lt2> <a:accent1> <a:srgbClr val="5C93B4"/> </a:accent1> <cellStyleXfs count="12"> <xf numFmtId="0" fontId="0" fillId="0" borderId="0"/> <xf numFmtId="0" fontId="4" fillId="2" borderId="0" applyBorder="1" applyAlignment="1" applyProtection="1"/> </protection> </xf> <cellStyles count="2"> <cellStyle name="Accent1" xfId="1" builtinId="29"/> B C D E Q1 Q2 Revenue 412.52 Start (build-in) <border> <left style="thick"> <color auto="1"/> </left> <right style="thick"> <color auto="1"/> </right> <top style="thick"> <color auto="1"/> </top> <bottom style="thick"> <color auto="1"/> </bottom> <diagonal/> </border>
  • 29. Charset Encoding <?xml version='1.0' encoding='character encoding' standalone='yes|no'?> import ( "golang.org/x/net/html/charset” "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" ) decoder = xml.NewDecoder(strings.NewReader(data)) decoder.CharsetReader = charset.NewReaderLabel Ref: https://encoding.spec.whatwg.org Name Labels UTF-8 6 Legacy single-byte encodings 168 Legacy multi-byte Chinese (simplified) encodings 10 Legacy multi-byte Chinese (traditional) encodings 5 Legacy multi-byte Japanese encodings 13 Legacy multi-byte Korean encodings 10 Legacy miscellaneous encodings 16 228 Labels // CharsetReader Decoder from all codepages to UTF-8 func CharsetReader(charset string, input io.Reader) io.Reader { var enc encoding.Encoding for i := range charmap.All { item = charmap.All[i] if strings.EqualFold(sm, nm) { enc = item } } return enc.NewDecoder().Reader(input) } Custom Charset Reader
  • 30. Streaming I/O Common Package Parts Package Relationships Core Properties Digital Signatures Specific Format Parts Office Document Part Relationships XML Part XML Part Part Rels Etc… <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"> <dimension ref="B2"/> <sheetViews> <sheetView tabSelected="1" workbookViewId="0" /> </sheetViews> <sheetFormatPr baseColWidth="10" defaultRowHeight="16" /> <sheetData> <row r="2"> <c r="B2"> <v>123</v> </c> </row> </sheetData> <pageMargins left="0.7" right="0.7" /> </worksheet> A B C 1 2 123 3 4
  • 31. Set Row <sheetData> <row r="2"> <c r="B2"> <v>123</v> </c> </row> </sheetData> func writeCell(buf *bufferedWriter, c xlsxC) { _, _ = buf.WriteString(`<c`) if c.XMLSpace.Value != "" { fmt.Fprintf(buf, ` xml:%s="%s"`, c.XMLSpace.Name.Local, c.XMLSpace.Value) } fmt.Fprintf(buf, ` r="%s"`, c.R) if c.S != 0 { fmt.Fprintf(buf, ` s="%d"`, c.S) } if c.T != "" { fmt.Fprintf(buf, ` t="%s"`, c.T) } _, _ = buf.WriteString(`>`) if c.F != nil { _, _ = buf.WriteString(`<f>`) _ = xml.EscapeText(buf, []byte(c.F.Content)) _, _ = buf.WriteString(`</f>`) } if c.V != "" { _, _ = buf.WriteString(`<v>`) _ = xml.EscapeText(buf, []byte(c.V)) _, _ = buf.WriteString(`</v>`) } _, _ = buf.WriteString(`</c>`) } type StreamWriter struct { File *File Sheet string SheetID int worksheet *xlsxWorksheet rawData bufferedWriter mergeCellsCount int mergeCells string tableParts string }
  • 32. Flush func (sw *StreamWriter) Flush() error { _, _ = sw.rawData.WriteString(`</sheetData>`) bulkAppendFields(&sw.rawData, sw.worksheet, 8, 15) if sw.mergeCellsCount > 0 { sw.mergeCells = fmt.Sprintf(`<mergeCells count="%d">%s</mergeCells>`, sw.mergeCellsCount, sw.mergeCells) } _, _ = sw.rawData.WriteString(sw.mergeCells) bulkAppendFields(&sw.rawData, sw.worksheet, 17, 38) _, _ = sw.rawData.WriteString(sw.tableParts) bulkAppendFields(&sw.rawData, sw.worksheet, 40, 40) _, _ = sw.rawData.WriteString(`</worksheet>`) if err := sw.rawData.Flush(); err != nil { return err } // ... } type StreamWriter struct { File *File Sheet string SheetID int worksheet *xlsxWorksheet rawData bufferedWriter mergeCellsCount int mergeCells string tableParts string } Generate XML Part
  • 33. Save Spreadsheet func (f *File) WriteToBuffer() (*bytes.Buffer, error) { buf := new(bytes.Buffer) zw := zip.NewWriter(buf) f.calcChainWriter() f.commentsWriter() f.contentTypesWriter() f.drawingsWriter() f.vmlDrawingWriter() f.workBookWriter() f.workSheetWriter() f.relsWriter() f.sharedStringsWriter() f.styleSheetWriter() for path, stream := range f.streams { // Save stream data stream.rawData.Close() } for path, content := range f.XLSX { // Save preserve data } } XML Part to ZIP Common Package Parts Package Relationships Specific Format Parts Office Document Part Relationships Style / Theme / Calc Chain Chart / PivotTable / Comments Part Rels Etc… Worksheets / SST Workbook
  • 34. Performance 102400 Row x 50 Columns, 6 Chars / Cell 0 5 10 15 20 25 30 Excelize 2.3.1@9316028 Streaming Write go1.15.2 darwin/amd64 Excelize 2.3.1@9316028 go1.15.2 darwin/amd64 Time Cost (s) Less is better 5.12 Million Cells 0 200 400 600 800 1000 1200 1400 1600 1800 Excelize 2.3.1@9316028 Streaming Write go1.15.2 darwin/amd64 Excelize 2.3.1@9316028 go1.15.2 darwin/amd64 Memory Usage (MB) Less is better https://github.com/xuri/excelize
  • 35. Processing XML and Spreadsheet in Go Gopher China Conference Beijing 2021 6/26 - 6/27 续日