mirror of
https://github.com/JetSetIlly/Gopher2600.git
synced 2024-05-20 13:48:02 -04:00
fingerprinting limited to first cartridgeloader.FingerprintLimit bytes
looking beyond this limit is unlikely to reveal any data of value and it can only cause excess slowdown for very large files, which are unlikely to be cartridge files in any case
This commit is contained in:
parent
1850b03ff5
commit
16adef8e7b
|
@ -53,11 +53,41 @@
|
||||||
// A file extension of "BIN", "ROM", "A26" indicates that the data should be
|
// A file extension of "BIN", "ROM", "A26" indicates that the data should be
|
||||||
// fingerprinted as normal.
|
// fingerprinted as normal.
|
||||||
//
|
//
|
||||||
|
// # Preloaded data
|
||||||
|
//
|
||||||
|
// Cartridges with lots of data wil be streamed off disk as required. For
|
||||||
|
// example, Moviecart or Supercharge audio tapes can be large and don't need to
|
||||||
|
// exist in memory for a very long time.
|
||||||
|
//
|
||||||
|
// However, for practical reasons the first 1MB of data of any file will be
|
||||||
|
// 'preloaded'. When reading cartridge data you don't need to worry about
|
||||||
|
// whether data has been preloaded or not, except that it does affect both
|
||||||
|
// hashing and fingerprinting.
|
||||||
|
//
|
||||||
// # Hashes
|
// # Hashes
|
||||||
//
|
//
|
||||||
// Creating a cartridge loader with NewLoaderFromFilename() or
|
// The creation of a cartridge loader includes the creation of both a SHA1 and
|
||||||
// NewLoaderFromData() will also create a SHA1 and MD5 hash of the data. The
|
// an MD5 hash. Hashes are useful for matching cartridges regardless of path
|
||||||
// amount of data used to create the has is limited to 1MB. For most cartridges
|
// or filename
|
||||||
// this will mean the hash is taken using all the data but some cartridge are
|
//
|
||||||
// likely to have much more data than that.
|
// The data used to create the hash is limited to the data that has been
|
||||||
|
// preloaded (see above).
|
||||||
|
//
|
||||||
|
// # Fingerprinting
|
||||||
|
//
|
||||||
|
// Cartridge data can be checked for 'fingerprint' data that can be used to
|
||||||
|
// decide on the 'mapping' the cartridge uses. The three cartridge loader
|
||||||
|
// functions, Contains(), ContainsLimit() and Count() can be used to search the
|
||||||
|
// preloaded data (see above) for specific bytes sequences.
|
||||||
|
//
|
||||||
|
// More complex fingerprinting can be done with the Read() function. However,
|
||||||
|
// because the Read() function works with the complete cartridge and not just
|
||||||
|
// the preloaded data, care should be taken not to read too much of the data for
|
||||||
|
// reasons of computation time. The constant value FingerprintLimit is provided
|
||||||
|
// as a useful value to which a Read() loop can be limited.
|
||||||
|
//
|
||||||
|
// Once fingerprinting has been completed it is very important to remember to
|
||||||
|
// reset the Read() position with the Seek() command:
|
||||||
|
//
|
||||||
|
// cartridgeloader.Seek(0, io.SeekStart)
|
||||||
package cartridgeloader
|
package cartridgeloader
|
||||||
|
|
|
@ -31,13 +31,20 @@ import (
|
||||||
"github.com/jetsetilly/gopher2600/archivefs"
|
"github.com/jetsetilly/gopher2600/archivefs"
|
||||||
)
|
)
|
||||||
|
|
||||||
// the maximum amount of data to load into the peep slice
|
// the maximum amount of data to preload
|
||||||
const maxPeepLength = 1048576
|
const maxPreloadLength = 1048576
|
||||||
|
|
||||||
// makes sures that data is capped at peep length. use this function when
|
// Fingerprinting beyond the first 64k or so of cartridge data can result in
|
||||||
// assigning to the Loader.peep field
|
// very slow fingerprinting, particular if looking at a large file that is not a
|
||||||
func peepData(data []byte) []byte {
|
// cartridge file at all
|
||||||
return data[:min(len(data), maxPeepLength)]
|
//
|
||||||
|
// The 64k value is arbitary but in practice it's a sufficiently large value and
|
||||||
|
// any data beyond that limit is unlikely to reveal anything of worth
|
||||||
|
const FingerprintLimit = 65536
|
||||||
|
|
||||||
|
// use this function when assigning to the Loader.preload field
|
||||||
|
func preloadLimit(data []byte) []byte {
|
||||||
|
return data[:min(len(data), maxPreloadLength)]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loader abstracts all the ways data can be loaded into the emulation.
|
// Loader abstracts all the ways data can be loaded into the emulation.
|
||||||
|
@ -72,13 +79,15 @@ type Loader struct {
|
||||||
data io.ReadSeeker
|
data io.ReadSeeker
|
||||||
size int
|
size int
|
||||||
|
|
||||||
// peep is the data at the beginning of the cartridge data. it is used to
|
// preload is the data at the beginning of the cartridge data that has been
|
||||||
// help fingerprinting and for creating the SHA1 and MD5 hashes
|
// preloaded immediately on creation of the cartridge loader
|
||||||
//
|
//
|
||||||
// in reality, most cartridges are small enough to fit entirely inside the
|
// in reality, most cartridges are small enough to fit entirely inside the
|
||||||
// peep field. currently it is only moviecart data and supercharger sound
|
// preload field. currently it is only moviecart data and supercharger sound
|
||||||
// files that are ever arger than maxPeepLength
|
// files that are ever larger than that
|
||||||
peep []byte
|
//
|
||||||
|
// the preload data is used to create the hashes
|
||||||
|
preload []byte
|
||||||
|
|
||||||
// data was supplied through NewLoaderFromData()
|
// data was supplied through NewLoaderFromData()
|
||||||
embedded bool
|
embedded bool
|
||||||
|
@ -190,7 +199,7 @@ func NewLoaderFromData(name string, data []byte, mapping string) (Loader, error)
|
||||||
ld := Loader{
|
ld := Loader{
|
||||||
Filename: name,
|
Filename: name,
|
||||||
Mapping: mapping,
|
Mapping: mapping,
|
||||||
peep: peepData(data),
|
preload: preloadLimit(data),
|
||||||
data: bytes.NewReader(data),
|
data: bytes.NewReader(data),
|
||||||
HashSHA1: fmt.Sprintf("%x", sha1.Sum(data)),
|
HashSHA1: fmt.Sprintf("%x", sha1.Sum(data)),
|
||||||
HashMD5: fmt.Sprintf("%x", md5.Sum(data)),
|
HashMD5: fmt.Sprintf("%x", md5.Sum(data)),
|
||||||
|
@ -232,7 +241,7 @@ func (ld *Loader) Close() error {
|
||||||
}
|
}
|
||||||
ld.data = nil
|
ld.data = nil
|
||||||
ld.size = 0
|
ld.size = 0
|
||||||
ld.peep = nil
|
ld.preload = nil
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -258,21 +267,22 @@ func (ld Loader) Size() int {
|
||||||
return ld.size
|
return ld.size
|
||||||
}
|
}
|
||||||
|
|
||||||
// Contains returns true if subslice appears anywhere in the peep data
|
// Contains returns true if subslice appears anywhere in the preload data.
|
||||||
func (ld Loader) Contains(subslice []byte) bool {
|
func (ld Loader) Contains(subslice []byte) bool {
|
||||||
return bytes.Contains(ld.peep, subslice)
|
return bytes.Contains(ld.preload, subslice)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ContainsLimit returns true if subslice appears in the peep data at an offset between
|
// ContainsLimit returns true if subslice appears anywhere in the preload data and
|
||||||
// zero and limit
|
// within the byte limit value supplied as a fuction parameter.
|
||||||
func (ld Loader) ContainsLimit(limit int, subslice []byte) bool {
|
func (ld Loader) ContainsLimit(limit int, subslice []byte) bool {
|
||||||
limit = min(limit, ld.Size())
|
limit = min(limit, ld.Size())
|
||||||
return bytes.Contains(ld.peep[:limit], subslice)
|
return bytes.Contains(ld.preload[:limit], subslice)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count returns the number of non-overlapping instances of subslice in the peep data
|
// Count returns the number of non-overlapping instances of subslice in the
|
||||||
|
// preload data.
|
||||||
func (ld Loader) Count(subslice []byte) int {
|
func (ld Loader) Count(subslice []byte) int {
|
||||||
return bytes.Count(ld.peep, subslice)
|
return bytes.Count(ld.preload, subslice)
|
||||||
}
|
}
|
||||||
|
|
||||||
// open the cartridge data. filenames with a valid schema will use that method
|
// open the cartridge data. filenames with a valid schema will use that method
|
||||||
|
@ -303,7 +313,7 @@ func (ld *Loader) open() error {
|
||||||
|
|
||||||
ld.data = bytes.NewReader(data)
|
ld.data = bytes.NewReader(data)
|
||||||
ld.size = len(data)
|
ld.size = len(data)
|
||||||
ld.peep = peepData(data)
|
ld.preload = preloadLimit(data)
|
||||||
|
|
||||||
case "file":
|
case "file":
|
||||||
fallthrough
|
fallthrough
|
||||||
|
@ -314,8 +324,7 @@ func (ld *Loader) open() error {
|
||||||
return fmt.Errorf("loader: %w", err)
|
return fmt.Errorf("loader: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// peep at data
|
ld.preload, err = io.ReadAll(io.LimitReader(r, maxPreloadLength))
|
||||||
ld.peep, err = io.ReadAll(io.LimitReader(r, maxPeepLength))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("loader: %w", err)
|
return fmt.Errorf("loader: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -328,8 +337,8 @@ func (ld *Loader) open() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// generate hashes
|
// generate hashes
|
||||||
ld.HashSHA1 = fmt.Sprintf("%x", sha1.Sum(ld.peep))
|
ld.HashSHA1 = fmt.Sprintf("%x", sha1.Sum(ld.preload))
|
||||||
ld.HashMD5 = fmt.Sprintf("%x", md5.Sum(ld.peep))
|
ld.HashMD5 = fmt.Sprintf("%x", md5.Sum(ld.preload))
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ func (cart *Cartridge) fingerprintPlusROM(loader cartridgeloader.Loader) bool {
|
||||||
b := make([]byte, 3)
|
b := make([]byte, 3)
|
||||||
loader.Seek(0, io.SeekStart)
|
loader.Seek(0, io.SeekStart)
|
||||||
|
|
||||||
for {
|
for i := 0; i < cartridgeloader.FingerprintLimit-len(b); i++ {
|
||||||
n, err := loader.Read(b)
|
n, err := loader.Read(b)
|
||||||
if n < len(b) {
|
if n < len(b) {
|
||||||
break
|
break
|
||||||
|
@ -148,7 +148,7 @@ func fingerprintMnetwork(loader cartridgeloader.Loader) bool {
|
||||||
b := make([]byte, 3)
|
b := make([]byte, 3)
|
||||||
loader.Seek(0, io.SeekStart)
|
loader.Seek(0, io.SeekStart)
|
||||||
|
|
||||||
for {
|
for i := 0; i < cartridgeloader.FingerprintLimit-len(b); i++ {
|
||||||
n, err := loader.Read(b)
|
n, err := loader.Read(b)
|
||||||
if n < len(b) {
|
if n < len(b) {
|
||||||
break
|
break
|
||||||
|
@ -277,7 +277,7 @@ func fingerprintCDF(loader cartridgeloader.Loader) (bool, string) {
|
||||||
b := make([]byte, 4)
|
b := make([]byte, 4)
|
||||||
loader.Seek(0, io.SeekStart)
|
loader.Seek(0, io.SeekStart)
|
||||||
|
|
||||||
for {
|
for i := 0; i < cartridgeloader.FingerprintLimit-len(b); i++ {
|
||||||
n, err := loader.Read(b)
|
n, err := loader.Read(b)
|
||||||
if n < len(b) {
|
if n < len(b) {
|
||||||
break
|
break
|
||||||
|
|
|
@ -75,9 +75,7 @@ func Fingerprint(port plugging.PortID, loader cartridgeloader.Loader) ports.NewP
|
||||||
|
|
||||||
func matchPattern(patterns [][]byte, loader cartridgeloader.Loader) bool {
|
func matchPattern(patterns [][]byte, loader cartridgeloader.Loader) bool {
|
||||||
for _, p := range patterns {
|
for _, p := range patterns {
|
||||||
// limit check to the first 64k of data. any data beyond that is likely
|
if loader.ContainsLimit(cartridgeloader.FingerprintLimit, p) {
|
||||||
// to be non-program data and only likely to return false-positives
|
|
||||||
if loader.ContainsLimit(65536, p) {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue