776 lines
22 KiB
PHP
776 lines
22 KiB
PHP
<?php
|
|
/****************************************************
|
|
* The following MIME sniffing class is based
|
|
* on the specification published at the following
|
|
* URL: http://mimesniff.spec.whatwg.org/
|
|
*
|
|
* To use this class, just pass the filename or
|
|
* a vile resource to the construct - the class
|
|
* will then examine the file and find the MIME
|
|
* type.
|
|
*
|
|
* The Public API:
|
|
* - get_type: Gets the determined MIME type,
|
|
* returns application/octet-stream
|
|
* if the MIME type is unknown.
|
|
* - is_empty: Whether or not the file is empty
|
|
* - is_text: Whether or not the file is plain
|
|
* text (UTF-8 or not)
|
|
* - is_font: Whether the file is a font file
|
|
* - is_zip: Whether the file is zipped
|
|
* - is_archive: Whether the file is an archive
|
|
* - is_scriptable:
|
|
* Determines whether the selected
|
|
* MIME type may contain executable
|
|
* scripts
|
|
***************************************************/
|
|
class MimeReader {
|
|
|
|
protected $file = null, $detected_type = null, $header = null;
|
|
protected static $binary_characters = '', $whitespace_characters = '', $tag_terminating_characters = '';
|
|
protected static $image = null, $media = null, $fonts = null, $archive = null, $text = null, $others = null, $unknown = null, $html = null;
|
|
|
|
|
|
public function __construct( $file ) {
|
|
$this->file = $file;
|
|
|
|
if ( empty( self::$binary_characters ) ) {
|
|
self::$binary_characters .= "\x00\x01\x02\x03\x04\x05\x06\x07\0x08\x0B\x0E\x0F\x10\x11";
|
|
self::$binary_characters .= "\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1C\x1D\x1E\x1F";
|
|
}
|
|
if ( empty( self::$whitespace_characters ) ) {
|
|
self::$whitespace_characters .= "\x09\x0A\x0C\x0D\x20";
|
|
}
|
|
if ( empty( self::$tag_terminating_characters ) ) {
|
|
self::$tag_terminating_characters .= "\x20\x3E";
|
|
}
|
|
|
|
if ( is_null( self::$image ) ) {
|
|
$image = &self::$image;
|
|
$image = array();
|
|
|
|
// Windows Icon
|
|
$image[] = array (
|
|
'mime' => 'image/vnd.microsoft.icon',
|
|
'pattern' => "\x00\x00\x01\x00",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => '', // none
|
|
);
|
|
// "BM" - BMP signature
|
|
$image[] = array (
|
|
'mime' => 'image/bmp',
|
|
'pattern' => "\x42\x4D",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "GIF87a" - GIF signature
|
|
$image[] = array (
|
|
'mime' => 'image/gif',
|
|
'pattern' => "\x47\x49\x46\x38\x37\x61",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "GIF89a" - GIF signature
|
|
$image[] = array (
|
|
'mime' => 'image/gif',
|
|
'pattern' => "\x47\x49\x46\x38\x39\x61",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "RIFF" followed by 4 bytes followed by "WEBPVP"
|
|
$image[] = array (
|
|
'mime' => 'image/webp',
|
|
'pattern' => "\x52\x49\x46\x46\x00\x00\x00\x00\x57\x45\x42\x50\x56\x50",
|
|
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// A byte with only the highest bit set followed by the string "PNG" followed by CR LF SUB LF - PNG signature
|
|
$image[] = array (
|
|
'mime' => 'image/png',
|
|
'pattern' => "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// JPEG start of image marker followed by another marker
|
|
$image[] = array (
|
|
'mime' => 'image/jpeg',
|
|
'pattern' => "\xFF\xD8\xFF",
|
|
'mask' => "\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// PSD signature
|
|
$image[] = array (
|
|
'mime' => 'application/psd',
|
|
'pattern' => "\x38\x42\x50\x53",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
}
|
|
if ( is_null( self::$media ) ) {
|
|
$media = &self::$media;
|
|
$media = array();
|
|
|
|
// The WebM signature
|
|
$media[] = array (
|
|
'mime' => 'video/webm',
|
|
'pattern' => "\x1A\x45\xDF\xA3",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// The .snd signature
|
|
$media[] = array (
|
|
'mime' => 'audio/basic',
|
|
'pattern' => "\x2E\x73\x6E\x64",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "FORM" followed by 4 bytes followed by "AIFF" - the AIFF signature
|
|
$media[] = array (
|
|
'mime' => 'audio/aiff',
|
|
'pattern' => "\x46\x4F\x52\x4D\x00\x00\x00\x00\x41\x49\x46\x46",
|
|
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// MP3 without ID3 tag /****** UNTESTED ******/
|
|
$media[] = array (
|
|
'mime' => 'audio/mpeg',
|
|
'pattern' => "\xFF\xFB",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "ID3" and the ID3v2-tagged MP3 signature
|
|
$media[] = array (
|
|
'mime' => 'audio/mpeg',
|
|
'pattern' => "\x49\x44\x33",
|
|
'mask' => "\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "OggS" followed by NUL - The OGG signature
|
|
$media[] = array (
|
|
'mime' => 'application/ogg',
|
|
'pattern' => "\x4F\x67\x67\x53\x00",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "MThd" followed by 4 bytes representing the number 6 in 32 bits (big endian) - MIDI signature
|
|
$media[] = array (
|
|
'mime' => 'audio/midi',
|
|
'pattern' => "\x4D\x54\x68\x64\x00\x00\x00\x06",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "RIFF" followed by 4 bytes followed by "AVI" - AVI signature
|
|
$media[] = array (
|
|
'mime' => 'video/avi',
|
|
'pattern' => "\x52\x49\x46\x46\x00\x00\x00\x00\x41\x56\x49\x20",
|
|
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "RIFF" followed by 4 bytes followed by "WAVE" - WAVE signature
|
|
$media[] = array (
|
|
'mime' => 'audio/wave',
|
|
'pattern' => "\x52\x49\x46\x46\x00\x00\x00\x00\x57\x41\x56\x45",
|
|
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
}
|
|
if ( is_null( self::$fonts ) ) {
|
|
$fonts = &self::$fonts;
|
|
$fonts = array();
|
|
|
|
// 34 bytes followed by "LP" - Opentype signature
|
|
$fonts[] = array (
|
|
'mime' => 'application/vnd.ms-fontobject',
|
|
'pattern' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" .
|
|
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x4C\x50",
|
|
'mask' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" .
|
|
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// 4 bytes representing version type 1 of true type font
|
|
$fonts[] = array (
|
|
'mime' => 'application/font-ttf',
|
|
'pattern' => "\x00\x01\x00\x00",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "OTTO" - Opentype signature
|
|
$fonts[] = array (
|
|
'mime' => 'application/font-off', // application/vnd.ms-opentype
|
|
'pattern' => "\x4F\x54\x54\x4F",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "ttcf" - Truetype Collection signature
|
|
$fonts[] = array (
|
|
'mime' => 'application/x-font-truetype-collection',
|
|
'pattern' => "\x74\x74\x63\x66",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// 'wOFF' - Web Open Font Format signature
|
|
$fonts[] = array (
|
|
'mime' => 'application/font-woff',
|
|
'pattern' => "\x77\x4F\x46\x46",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
}
|
|
if ( is_null( self::$archive ) ) {
|
|
$archive = &self::$archive;
|
|
$archive = array();
|
|
|
|
// GZIP signature
|
|
$archive[] = array (
|
|
'mime' => 'application/x-gzip',
|
|
'pattern' => "\x1F\x8B\x08",
|
|
'mask' => "\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "PK" followed by ETX, EOT - ZIP signature
|
|
$archive[] = array (
|
|
'mime' => 'application/zip',
|
|
'pattern' => "\x50\x4B\x03\x04",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "Rar " followed by SUB, BEL, NUL - RAR signature
|
|
$archive[] = array (
|
|
'mime' => 'application/x-rar-compressed',
|
|
'pattern' => "\x52\x61\x72\x20\x1A\x07\x00",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
}
|
|
if ( is_null( self::$text ) ) {
|
|
$text = &self::$text;
|
|
$text = array();
|
|
|
|
// "%!PS-Adobe-" - Postscript signature
|
|
$text[] = array (
|
|
'mime' => 'application/postscript',
|
|
'pattern' => "\x25\x50\x53\x2D\x41\x64\x6F\x62\x65",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// UTF-16 Big Endian BOM text
|
|
$text[] = array (
|
|
'mime' => 'text/plain',
|
|
'pattern' => "\xFF\xFE",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// UTF-16 Little Endian BOM text
|
|
$text[] = array (
|
|
'mime' => 'text/plain',
|
|
'pattern' => "\xFE\xFF",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// UTF-8 BOM text
|
|
$text[] = array (
|
|
'mime' => 'text/plain',
|
|
'pattern' => "\xEF\xBB\xBF",
|
|
'mask' => "\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
}
|
|
if ( is_null( self::$others ) ) {
|
|
$others = &self::$others;
|
|
$others = array();
|
|
|
|
$others[] = array (
|
|
'mime' => 'WINDOWS EXECUTABLE',
|
|
'pattern' => "\x4D\x5A",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
$others[] = array (
|
|
'mime' => 'EXEC_LINKABLE',
|
|
'pattern' => "\x7F\x45\x4C\x46",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
|
|
}
|
|
if ( is_null( self::$unknown ) ) {
|
|
$unknown = &self::$unknown;
|
|
$unknown = array();
|
|
|
|
// "<!DOCTYPE HTML"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x21\x44\x4F\x43\x54\x59\x50\x45\x20\x48\x54\x4D\x4C",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<HTML"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x48\x54\x4D\x4C",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<HEAD"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x48\x45\x41\x44",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<SCRIPT"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x53\x43\x52\x49\x50\x54",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<IFRAME"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x49\x46\x52\x41\x4D\x45",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<H1"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x48\x31",
|
|
'mask' => "\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<DIV"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x44\x49\x56",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<FONT"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x46\x4F\x4E\x54",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<TABLE"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x54\x41\x42\x4C\x45",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<A"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x41",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<STYLE"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x53\x54\x59\x4C\x45",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<TITLE"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x54\x49\x54\x4C\x45",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<B"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x42",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<BODY"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x42\x4F\x44\x59",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<BR"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x42\x52",
|
|
'mask' => "\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<P"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x50",
|
|
'mask' => "\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "<!--"
|
|
$unknown[] = array (
|
|
'mime' => 'text/html',
|
|
'pattern' => "\x3C\x21\x2D\x2D",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
$unknown[] = array (
|
|
'mime' => 'text/xml',
|
|
'pattern' => "\x3C\x3F\x78\x6D\x6C",
|
|
'mask' => "\xFF\xFF\xFF\xFF\xFF",
|
|
'ignore' => self::$whitespace_characters
|
|
);
|
|
// "%PDF" - PDF signature
|
|
$unknown[] = array (
|
|
'mime' => 'application/pdf',
|
|
'pattern' => "\x25\x50\x44\x46",
|
|
'mask' => "\xFF\xFF\xFF\xFF",
|
|
'ignore' => ''
|
|
);
|
|
// "PK" followed by 18 bytes - Microsoft Open Office signature
|
|
$unknown[] = array (
|
|
'mime' => 'application/docx',
|
|
'pattern' => "\x50\x4B\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
|
'mask' => "\xFF\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
|
'ignore' => ''
|
|
);
|
|
//
|
|
}
|
|
|
|
$this->read_resource_header();
|
|
$this->detect_type();
|
|
}
|
|
|
|
/* The public API */
|
|
public function is_empty() {
|
|
if ( 'inode/x-empty' === $this->detected_type ) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
public function is_text() {
|
|
if ( 'application/postscript' === $this->detected_type ) return true;
|
|
if ( 'text/plain' === $this->detected_type ) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
public function is_font() {
|
|
if ( $this->detected_type === 'application/font-ttf' ) return true;
|
|
if ( $this->detected_type === 'application/font-cff' ) return true;
|
|
if ( $this->detected_type === 'application/font-otf' ) return true;
|
|
if ( $this->detected_type === 'application/font-sntf' ) return true;
|
|
if ( $this->detected_type === 'application/vds.ms-opentype' ) return true;
|
|
if ( $this->detected_type === 'application/font-woff' ) return true;
|
|
if ( $this->detected_type === 'application/vnd.ms-fontobject' ) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
public function is_zip() {
|
|
if ( $this->detected_type === 'application/zip' ) return true;
|
|
if ( substr( $this->detected_type, -4 ) === '+zip' ) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
public function is_archive() {
|
|
if ( $this->detected_type === 'application/x-rar-compressed' ) return true;
|
|
if ( $this->detected_type === 'application/zip' ) return true;
|
|
if ( $this->detected_type === 'application/x-gzip' ) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
public function is_scriptable() {
|
|
if ( $this->detected_type === 'text/html' ) return true;
|
|
if ( $this->detected_type === 'application/pdf' ) return true;
|
|
if ( $this->detected_type === 'application/postscript' ) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
public function get_type() {
|
|
return $this->detected_type;
|
|
}
|
|
|
|
/**
|
|
* Helper functions.
|
|
* Execution is passed over to detect_type after the
|
|
* construct sets up the data.
|
|
*
|
|
*/
|
|
protected function read_resource_header() {
|
|
if ( is_string( $this->file ) ) {
|
|
$fp = fopen( $this->file, 'r' );
|
|
$header = fread( $fp, 512 );
|
|
fclose( $fp );
|
|
} else {
|
|
// The current position may not be at the start. Let's take it then set to
|
|
// start of file, read 512 bytes then reset to last position.
|
|
$position = ftell( $this->file );
|
|
fseek( $this->file, 0, SEEK_SET );
|
|
$header = fread( $this->file, 512 );
|
|
fseek( $this->file, $position, SEEK_SET );
|
|
}
|
|
|
|
$this->header = &$header;
|
|
}
|
|
|
|
protected function match_pattern( $pattern, $mask, $ignore ) {
|
|
if ( empty( $pattern ) || empty( $mask ) ) {
|
|
return false;
|
|
}
|
|
$s = 0;
|
|
|
|
$sequence = &$this->header;
|
|
$seq_len = strlen( $sequence );
|
|
$pattern_len = strlen( $pattern );
|
|
$mask_len = strlen( $mask );
|
|
if ( $pattern_len !== $mask_len ) {
|
|
return false;
|
|
}
|
|
|
|
// First we will set $s so that it ignores the first bytes if it needs to
|
|
if ( !empty( $ignore ) ) {
|
|
for ( $s = 0; $s < $seq_len; ) {
|
|
// This letter should not be ignored.
|
|
if ( strpos( $ignore, $sequence{$s} ) === false ) {
|
|
break;
|
|
}
|
|
|
|
++$s;
|
|
}
|
|
}
|
|
|
|
// Now we will compare. If it doesn't match the mask, we return false.
|
|
for ( $i = 0; $i < $pattern_len; ) {
|
|
$masked_data = $sequence{$s} & $mask{$i};
|
|
|
|
if ( $masked_data !== $pattern{$i} ) {
|
|
return false;
|
|
}
|
|
|
|
++$i; ++$s;
|
|
}
|
|
|
|
// Mask matched. This pattern matches.
|
|
return true;
|
|
}
|
|
|
|
protected function html_match_pattern( $pattern, $mask, $ignore ) {
|
|
if ( empty( $pattern ) || empty( $mask ) ) {
|
|
return false;
|
|
}
|
|
$s = 0; $i = 0;
|
|
|
|
$sequence = &$this->header;
|
|
$seq_len = strlen( $sequence );
|
|
$pattern_len = strlen( $pattern );
|
|
$mask_len = strlen( $mask );
|
|
|
|
if ( $pattern_len !== $mask_len ) {
|
|
return false;
|
|
}
|
|
|
|
// First we will set $s so that it ignores the first bytes if it needs to
|
|
if ( !empty( $ignore ) ) {
|
|
for (; $s < $seq_len; ) {
|
|
// This letter should not be ignored.
|
|
if ( strpos( $ignore, $sequence{$s} ) === false ) {
|
|
break;
|
|
}
|
|
|
|
++$s;
|
|
}
|
|
}
|
|
|
|
// Now we will compare. If it doesn't match the mask, we return false.
|
|
for (; $i < $pattern_len; ) {
|
|
$masked_data = $sequence{$s} & $mask{$i};
|
|
|
|
if ( $masked_data !== $pattern{$i} ) {
|
|
return false;
|
|
}
|
|
|
|
++$i; ++$s;
|
|
}
|
|
|
|
// Mask matched. This pattern matches if the last character is tag-terminating.
|
|
return strpos( self::$tag_terminating_character, $sequence{$s} );
|
|
}
|
|
|
|
protected function detect_type() {
|
|
if ( $this->sniff_empty() ) return;
|
|
if ( $this->sniff_images() ) return;
|
|
if ( $this->sniff_media() ) return;
|
|
if ( $this->sniff_fonts() ) return;
|
|
if ( $this->sniff_archive() ) return;
|
|
if ( $this->sniff_text() ) return;
|
|
if ( $this->sniff_unknown() ) return;
|
|
if ( $this->sniff_others() ) return;
|
|
}
|
|
|
|
/* Sniffer functions */
|
|
protected function sniff_empty() {
|
|
if (strlen($this->header) === 0) {
|
|
$this->detected_type = 'inode/x-empty';
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_images() {
|
|
$num_imgs = count( self::$image );
|
|
for ( $i = 0; $i < $num_imgs; $i++ ) {
|
|
$im = &self::$image[$i];
|
|
|
|
if ( $this->match_pattern( $im['pattern'], $im['mask'], $im['ignore'] ) ) {
|
|
$this->detected_type = $im['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_media() {
|
|
$num_media = count( self::$media );
|
|
for ( $i = 0; $i < $num_media; $i++ ) {
|
|
$m = &self::$media[$i];
|
|
if ( $this->match_pattern( $m['pattern'], $m['mask'], $m['ignore'] ) ) {
|
|
$this->detected_type = $m['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if ( $this->sniff_mp4() ) {
|
|
$this->detected_type = 'video/mp4';
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_mp4() {
|
|
$sequence = &$this->header;
|
|
$seq_len = strlen( $sequence );
|
|
|
|
if ( $seq_len < 12 ) {
|
|
return false;
|
|
}
|
|
|
|
$box_size = substr( $sequence, 0, 4 );
|
|
$box_size = unpack( 'N', $box_size );
|
|
$box_size = $box_size[1];
|
|
|
|
if ( $seq_len < $box_size ) return false;
|
|
if ( $box_size % 4 ) return false;
|
|
|
|
if ( substr( $sequence, 4, 4 ) !== "\x66\x74\x79\x70" ) return false;
|
|
if ( substr( $sequence, 8, 3 ) === "\x6D\x70\x34" ) return true;
|
|
|
|
$i = 16;
|
|
while ( $i < $box_size ) {
|
|
if ( substr( $sequence, $i, 3 ) === "\x6D\x70\x34" ) return true;
|
|
$i += 4;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_fonts() {
|
|
$num_fonts = count( self::$fonts );
|
|
for ( $i = 0; $i < $num_fonts; $i++ ) {
|
|
$f = &self::$fonts[$i];
|
|
if ( $this->match_pattern( $f['pattern'], $f['mask'], $f['ignore'] ) ) {
|
|
$this->detected_type = $f['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_archive() {
|
|
$num_archives = count( self::$archive );
|
|
for ( $i = 0; $i < $num_archives; $i++ ) {
|
|
$a = &self::$archive[$i];
|
|
if ( $this->match_pattern( $a['pattern'], $a['mask'], $a['ignore'] ) ) {
|
|
$this->detected_type = $a['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_text() {
|
|
$num_texts = count( self::$text );
|
|
for ( $i = 0; $i < $num_texts; $i++ ) {
|
|
$t = &self::$text[$i];
|
|
if ( $this->match_pattern( $t['pattern'], $t['mask'], $t['ignore'] ) ) {
|
|
if ( $this->has_binary_data() ) {
|
|
return false;
|
|
} else {
|
|
$this->detected_type = $t['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_unknown() {
|
|
$num_unknown = count( self::$unknown );
|
|
for ( $i = 0; $i < $num_unknown; $i++ ) {
|
|
$u = &self::$unknown[$i];
|
|
if ( 'text/html' === $u['mime'] ) {
|
|
if ( $this->html_match_pattern( $u['pattern'], $u['mask'], $u['ignore'] ) ) {
|
|
$this->detected_type = 'text/html';
|
|
return true;
|
|
}
|
|
} else {
|
|
if ( $this->match_pattern( $u['pattern'], $u['mask'], $u['ignore'] ) ) {
|
|
$this->detected_type = $u['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function sniff_others() {
|
|
$num_others = count( self::$others );
|
|
for ( $i = 0; $i < $num_others; $i++ ) {
|
|
$o = &self::$others[$i];
|
|
if ( $this->match_pattern( $o['pattern'], $o['mask'], $o['ignore'] ) ) {
|
|
$this->detected_type = $o['mime'];
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
protected function has_binary_data() {
|
|
static $binary_chars;
|
|
|
|
if ( is_string( $binary_chars ) )
|
|
$binary_chars = str_split( $this->binary_characters );
|
|
|
|
$num_chars = count( $binary_chars );
|
|
for ( $i = 0; $i < $num_chars; $i++ ) {
|
|
if ( strpos( $this->header, $binary_chars[$i] ) !== false ) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|