metadata

package
v0.0.0-...-227fc60 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 15, 2025 License: MIT Imports: 8 Imported by: 0

Documentation

Overview

Package metadata provides comprehensive metadata extraction from .doc files.

This package implements full SummaryInformation and DocumentSummaryInformation parsing according to the OLE Property Set specification, supporting all standard document properties and custom properties.

Index

Constants

View Source
const (
	PIDTitle        = 0x02
	PIDSubject      = 0x03
	PIDAuthor       = 0x04
	PIDKeywords     = 0x05
	PIDComments     = 0x06
	PIDTemplate     = 0x07
	PIDLastAuthor   = 0x08
	PIDRevNumber    = 0x09
	PIDEditTime     = 0x0A
	PIDLastPrinted  = 0x0B
	PIDCreateTime   = 0x0C
	PIDLastSaveTime = 0x0D
	PIDPageCount    = 0x0E
	PIDWordCount    = 0x0F
	PIDCharCount    = 0x10
	PIDThumbnail    = 0x11
	PIDAppName      = 0x12
	PIDSecurity     = 0x13
)

Property IDs for SummaryInformation stream

View Source
const (
	PIDCategory            = 0x02
	PIDPresentationFormat  = 0x03
	PIDByteCount           = 0x04
	PIDLineCount           = 0x05
	PIDParaCount           = 0x06
	PIDSlideCount          = 0x07
	PIDNoteCount           = 0x08
	PIDHiddenCount         = 0x09
	PIDMMClipCount         = 0x0A
	PIDScale               = 0x0B
	PIDHeadingPairs        = 0x0C
	PIDDocParts            = 0x0D
	PIDManager             = 0x0E
	PIDCompany             = 0x0F
	PIDLinksUpToDate       = 0x10
	PIDCharCountWithSpaces = 0x11
	PIDSharedDoc           = 0x13
	PIDHyperLinkBase       = 0x15
	PIDHyperLinks          = 0x16
	PIDHyperLinksChanged   = 0x17
	PIDVersion             = 0x18
	PIDDigSig              = 0x19
	PIDContentType         = 0x1A
	PIDContentStatus       = 0x1B
	PIDLanguage            = 0x1C
	PIDDocVersion          = 0x1D
)

Property IDs for DocumentSummaryInformation stream

Variables

This section is empty.

Functions

This section is empty.

Types

type DocumentMetadata

type DocumentMetadata struct {
	// Core properties from SummaryInformation
	Title               string    // Document title
	Subject             string    // Document subject
	Author              string    // Document author
	Keywords            string    // Document keywords
	Comments            string    // Document comments
	Template            string    // Template name
	LastAuthor          string    // Last saved by
	RevisionNumber      string    // Revision number
	ApplicationName     string    // Creating application
	Created             time.Time // Creation time
	LastSaved           time.Time // Last saved time
	LastPrinted         time.Time // Last printed time
	TotalEditTime       int64     // Total editing time in minutes
	PageCount           int32     // Number of pages
	WordCount           int32     // Number of words
	CharCount           int32     // Number of characters
	CharCountWithSpaces int32     // Number of characters with spaces
	Security            int32     // Security flags
	Category            string    // Document category
	PresentationFormat  string    // Presentation format
	ByteCount           int64     // Number of bytes
	LineCount           int32     // Number of lines
	ParagraphCount      int32     // Number of paragraphs
	SlideCount          int32     // Number of slides
	NoteCount           int32     // Number of notes
	HiddenSlideCount    int32     // Number of hidden slides
	MultimediaClipCount int32     // Number of multimedia clips

	// Document summary properties
	Company          string                 // Company name
	Manager          string                 // Manager name
	Language         int32                  // Document language
	DocumentVersion  string                 // Document version
	ContentType      string                 // Content type
	ContentStatus    string                 // Content status
	HyperLinkBase    string                 // Hyperlink base
	CustomProperties map[string]interface{} // Custom properties

	// Extended properties
	ThumbnailClipboardFormat int32  // Thumbnail format
	ThumbnailData            []byte // Thumbnail image data

	// Security and protection
	ReadOnlyRecommended      bool // Read-only recommended
	WriteReservationPassword bool // Write reservation password set
	ReadOnlyPassword         bool // Read-only password set
}

DocumentMetadata holds comprehensive document metadata information.

func (*DocumentMetadata) GetLanguageName

func (metadata *DocumentMetadata) GetLanguageName() string

GetLanguageName returns the human-readable language name for a language code.

func (*DocumentMetadata) IsProtected

func (metadata *DocumentMetadata) IsProtected() bool

IsProtected returns true if the document has any protection enabled.

type MetadataExtractor

type MetadataExtractor struct {
	// contains filtered or unexported fields
}

MetadataExtractor handles extraction of metadata from .doc files.

func NewMetadataExtractor

func NewMetadataExtractor(reader *ole2.Reader) *MetadataExtractor

NewMetadataExtractor creates a new metadata extractor.

func (*MetadataExtractor) ExtractMetadata

func (me *MetadataExtractor) ExtractMetadata() (*DocumentMetadata, error)

ExtractMetadata extracts complete metadata from the document.

type PropertyType

type PropertyType uint16

PropertyType represents the data type of a property.

const (
	PropertyTypeEmpty         PropertyType = 0x0000 // VT_EMPTY
	PropertyTypeNull          PropertyType = 0x0001 // VT_NULL
	PropertyTypeInt16         PropertyType = 0x0002 // VT_I2
	PropertyTypeInt32         PropertyType = 0x0003 // VT_I4
	PropertyTypeFloat         PropertyType = 0x0004 // VT_R4
	PropertyTypeDouble        PropertyType = 0x0005 // VT_R8
	PropertyTypeCurrency      PropertyType = 0x0006 // VT_CY
	PropertyTypeDate          PropertyType = 0x0007 // VT_DATE
	PropertyTypeString        PropertyType = 0x0008 // VT_BSTR
	PropertyTypeBoolean       PropertyType = 0x000B // VT_BOOL
	PropertyTypeVariant       PropertyType = 0x000C // VT_VARIANT
	PropertyTypeInt8          PropertyType = 0x0010 // VT_I1
	PropertyTypeUInt8         PropertyType = 0x0011 // VT_UI1
	PropertyTypeUInt16        PropertyType = 0x0012 // VT_UI2
	PropertyTypeUInt32        PropertyType = 0x0013 // VT_UI4
	PropertyTypeInt64         PropertyType = 0x0014 // VT_I8
	PropertyTypeUInt64        PropertyType = 0x0015 // VT_UI8
	PropertyTypeFileTime      PropertyType = 0x0040 // VT_FILETIME
	PropertyTypeBlob          PropertyType = 0x0041 // VT_BLOB
	PropertyTypeClipboardData PropertyType = 0x0047 // VT_CF
	PropertyTypeStringA       PropertyType = 0x001E // VT_LPSTR
	PropertyTypeStringW       PropertyType = 0x001F // VT_LPWSTR
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL