ref: bcf618b256c101e9f4a47af77541a559bb8bb44d
parent: 40cb1dc3ac2c152593703d4f534a8de5af19ae1e
author: Werner Lemberg <[email protected]>
date: Mon Nov 2 01:50:49 EST 2015
[ftfuzzer] Add support for multiple files (patch #8779). Currently, libFuzzer only supports mutation of a single file. We circumvent this problem by using an uncompressed tar archive as multiple-file input for the fuzzer. This patch enables tests of `FT_Attach_Stream' and AFM/PFM parsing; a constructed tarball should contain a font file as the first element, and files to be attached as further elements. * src/tools/ftfuzzer/ftfuzzer.cc: Include libarchive headers. (archive_read_entry_data, parse_data): New functions. (LLVMFuzzerTestOneInput): Updated. * src/tools/ftfuzzer/ftmutator.cc: New file, providing a custom mutator for libFuzzer that can mutate tarballs in a sensible way.
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2015-11-02 Bungeman <[email protected]>
+
+ [ftfuzzer] Add support for multiple files (patch #8779).
+
+ Currently, libFuzzer only supports mutation of a single file. We
+ circumvent this problem by using an uncompressed tar archive as
+ multiple-file input for the fuzzer.
+
+ This patch enables tests of `FT_Attach_Stream' and AFM/PFM parsing;
+ a constructed tarball should contain a font file as the first
+ element, and files to be attached as further elements.
+
+ * src/tools/ftfuzzer/ftfuzzer.cc: Include libarchive headers.
+ (archive_read_entry_data, parse_data): New functions.
+ (LLVMFuzzerTestOneInput): Updated.
+
+ * src/tools/ftfuzzer/ftmutator.cc: New file, providing a custom
+ mutator for libFuzzer that can mutate tarballs in a sensible way.
+
2015-10-31 Werner Lemberg <[email protected]>
[sfnt] Fix cmap 14 validation (#46346).
--- a/src/tools/ftfuzzer/ftfuzzer.cc
+++ b/src/tools/ftfuzzer/ftfuzzer.cc
@@ -1,8 +1,11 @@
-// we use `unique_ptr' and `decltype', defined since C++11
+// we use `unique_ptr', `decltype', and other gimmicks defined since C++11
#if __cplusplus < 201103L
# error "a C++11 compiler is needed"
#endif
+#include <archive.h>
+#include <archive_entry.h>
+
#include <assert.h>
#include <stdint.h>
@@ -10,7 +13,7 @@
#include <vector>
-using namespace std;
+ using namespace std;
#include <ft2build.h>
@@ -34,6 +37,7 @@
static FT_Library library;
static int InitResult;
+
struct FT_Global {
FT_Global() {
InitResult = FT_Init_FreeType( &library );
@@ -46,6 +50,81 @@
FT_Global global_ft;
+ static int
+ archive_read_entry_data( struct archive *ar,
+ vector<FT_Byte> *vw )
+ {
+ int r;
+ const FT_Byte* buff;
+ size_t size;
+ int64_t offset;
+
+ for (;;)
+ {
+ r = archive_read_data_block( ar,
+ reinterpret_cast<const void**>( &buff ),
+ &size,
+ &offset );
+ if ( r == ARCHIVE_EOF )
+ return ARCHIVE_OK;
+ if ( r != ARCHIVE_OK )
+ return r;
+
+ vw->insert( vw->end(), buff, buff + size );
+ }
+ }
+
+
+ static vector<vector<FT_Byte>>
+ parse_data( const uint8_t* data,
+ size_t size )
+ {
+ struct archive_entry* entry;
+ int r;
+ vector<vector<FT_Byte>> files;
+
+ unique_ptr<struct archive,
+ decltype ( archive_read_free )*> a( archive_read_new(),
+ archive_read_free );
+
+ // activate reading of uncompressed tar archives
+ archive_read_support_format_tar( a.get() );
+
+ // the need for `const_cast' was removed with libarchive commit be4d4dd
+ if ( !( r = archive_read_open_memory(
+ a.get(),
+ const_cast<void*>(static_cast<const void*>( data ) ),
+ size ) ) )
+ {
+ unique_ptr<struct archive,
+ decltype ( archive_read_close )*> a_open( a.get(),
+ archive_read_close );
+
+ // read files contained in archive
+ for (;;)
+ {
+ r = archive_read_next_header( a_open.get(), &entry );
+ if ( r == ARCHIVE_EOF )
+ break;
+ if ( r != ARCHIVE_OK )
+ break;
+
+ vector<FT_Byte> entry_data;
+ r = archive_read_entry_data( a.get(), &entry_data );
+ if ( r != ARCHIVE_OK )
+ break;
+
+ files.push_back( move( entry_data ) );
+ }
+ }
+
+ if ( files.size() == 0 )
+ files.emplace_back( data, data + size );
+
+ return files;
+ }
+
+
static void
setIntermediateAxis( FT_Face face )
{
@@ -74,6 +153,7 @@
}
+ // the interface function to the libFuzzer library
extern "C" int
LLVMFuzzerTestOneInput( const uint8_t* data,
size_t size_ )
@@ -83,7 +163,7 @@
if ( size_ < 1 )
return 0;
- long size = (long)size_;
+ const vector<vector<FT_Byte>>& files = parse_data( data, size_ );
FT_Face face;
FT_Int32 load_flags = FT_LOAD_DEFAULT;
@@ -99,7 +179,11 @@
// more than a single font.
// get number of faces
- if ( FT_New_Memory_Face( library, data, size, -1, &face ) )
+ if ( FT_New_Memory_Face( library,
+ files[0].data(),
+ (FT_Long)files[0].size(),
+ -1,
+ &face ) )
return 0;
long num_faces = face->num_faces;
FT_Done_Face( face );
@@ -111,8 +195,8 @@
{
// get number of instances
if ( FT_New_Memory_Face( library,
- data,
- size,
+ files[0].data(),
+ (FT_Long)files[0].size(),
-( face_index + 1 ),
&face ) )
continue;
@@ -125,12 +209,29 @@
instance_index++ )
{
if ( FT_New_Memory_Face( library,
- data,
- size,
+ files[0].data(),
+ (FT_Long)files[0].size(),
( instance_index << 16 ) + face_index,
&face ) )
continue;
+ // if we have more than a single input file coming from an archive,
+ // attach them (starting with the second file) using the order given
+ // in the archive
+ for ( size_t files_index = 1;
+ files_index < files.size();
+ files_index++ )
+ {
+ FT_Open_Args open_args = {};
+ open_args.flags = FT_OPEN_MEMORY;
+ open_args.memory_base = files[files_index].data();
+ open_args.memory_size = (FT_Long)files[files_index].size();
+
+ // the last archive element will be eventually used as the
+ // attachment
+ FT_Attach_Stream( face, &open_args );
+ }
+
// loop over all bitmap stroke sizes
// and an arbitrary size for outlines
for ( long fixed_sizes_index = 0;
@@ -192,4 +293,4 @@
}
-/* END */
+// END
--- /dev/null
+++ b/src/tools/ftfuzzer/ftmutator.cc
@@ -1,0 +1,301 @@
+// A custom fuzzer mutator for FreeType.
+//
+// Since `tar' is not a valid format for input to FreeType, treat any input
+// that looks like `tar' as multiple files and mutate them separately.
+//
+// In the future, a variation of this may be used to guide mutation on a
+// logically higher level.
+
+// we use `unique_ptr', `decltype', and other gimmicks defined since C++11
+#if __cplusplus < 201103L
+# error "a C++11 compiler is needed"
+#endif
+
+#include <cstdint>
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+#include <memory>
+#include <vector>
+
+#include <archive.h>
+#include <archive_entry.h>
+
+#include "FuzzerInterface.h"
+
+
+ using namespace std;
+
+
+ // This function should be defined by `ftfuzzer.cc'.
+ extern "C" int
+ LLVMFuzzerTestOneInput( const uint8_t* Data,
+ size_t Size );
+
+
+ static void
+ check_result( struct archive* a,
+ int r )
+ {
+ if ( r == ARCHIVE_OK )
+ return;
+
+ const char* m = archive_error_string( a );
+ write( 1, m, strlen( m ) );
+ exit( 1 );
+ }
+
+
+ static int
+ archive_read_entry_data( struct archive *ar,
+ vector<uint8_t> *vw )
+ {
+ int r;
+ const uint8_t* buff;
+ size_t size;
+ int64_t offset;
+
+ for (;;)
+ {
+ r = archive_read_data_block( ar,
+ reinterpret_cast<const void**>( &buff ),
+ &size,
+ &offset );
+ if ( r == ARCHIVE_EOF )
+ return ARCHIVE_OK;
+ if ( r != ARCHIVE_OK )
+ return r;
+
+ vw->insert( vw->end(), buff, buff + size );
+ }
+ }
+
+
+ static vector<vector<uint8_t>>
+ parse_data( const uint8_t* data,
+ size_t size )
+ {
+ struct archive_entry* entry;
+ int r;
+ vector<vector<uint8_t>> files;
+
+ unique_ptr<struct archive,
+ decltype ( archive_read_free )*> a( archive_read_new(),
+ archive_read_free );
+
+ // activate reading of uncompressed tar archives
+ archive_read_support_format_tar( a.get() );
+
+ // the need for `const_cast' was removed with libarchive commit be4d4dd
+ if ( !( r = archive_read_open_memory(
+ a.get(),
+ const_cast<void*>(static_cast<const void*>( data ) ),
+ size ) ) )
+ {
+ unique_ptr<struct archive,
+ decltype ( archive_read_close )*> a_open( a.get(),
+ archive_read_close );
+
+ // read files contained in archive
+ for (;;)
+ {
+ r = archive_read_next_header( a_open.get(), &entry );
+ if ( r == ARCHIVE_EOF )
+ break;
+ if ( r != ARCHIVE_OK )
+ break;
+
+ vector<uint8_t> entry_data;
+ r = archive_read_entry_data( a.get(), &entry_data );
+ if ( entry_data.size() == 0 )
+ continue;
+
+ files.push_back( move( entry_data ) );
+ if ( r != ARCHIVE_OK )
+ break;
+ }
+ }
+
+ return files;
+ }
+
+
+ class FTFuzzer
+ : public fuzzer::UserSuppliedFuzzer
+ {
+
+ public:
+ FTFuzzer( fuzzer::FuzzerRandomBase* Rand )
+ : fuzzer::UserSuppliedFuzzer( Rand ) {}
+
+
+ int
+ TargetFunction( const uint8_t* Data,
+ size_t Size )
+ {
+ return LLVMFuzzerTestOneInput( Data, Size );
+ }
+
+
+ // Custom mutator.
+ virtual size_t
+ Mutate( uint8_t* Data,
+ size_t Size,
+ size_t MaxSize )
+ {
+ vector<vector<uint8_t>> files = parse_data( Data, Size );
+
+ // If the file was not recognized as a tar file, treat it as non-tar.
+ if ( files.size() == 0 )
+ return fuzzer::UserSuppliedFuzzer::Mutate( Data, Size, MaxSize );
+
+ // This is somewhat `white box' on tar. The tar format uses 512 byte
+ // blocks. One block as header for each file, two empty blocks of 0's
+ // at the end. File data is padded to fill its last block.
+ size_t used_blocks = files.size() + 2;
+ for ( const auto& file : files )
+ used_blocks += ( file.size() + 511 ) / 512;
+
+ size_t max_blocks = MaxSize / 512;
+
+ // If the input is big, it will need to be downsized. If the original
+ // tar file was too big, it may have been clipped to fit. In this
+ // case it may not be possible to properly write out the data, as
+ // there may not be enough space for the trailing two blocks. Start
+ // dropping file data or files from the end.
+ for ( size_t i = files.size();
+ i-- > 1 && used_blocks > max_blocks; )
+ {
+ size_t blocks_to_free = used_blocks - max_blocks;
+ size_t blocks_currently_used_by_file_data =
+ ( files[i].size() + 511 ) / 512;
+
+ if ( blocks_currently_used_by_file_data >= blocks_to_free )
+ {
+ files[i].resize( ( blocks_currently_used_by_file_data -
+ blocks_to_free ) * 512 );
+ used_blocks -= blocks_to_free;
+ continue;
+ }
+
+ files.pop_back();
+ used_blocks -= blocks_currently_used_by_file_data + 1;
+ }
+
+ // If we get down to one file, don't use tar.
+ if ( files.size() == 1 )
+ {
+ memcpy( Data, files[0].data(), files[0].size() );
+ return fuzzer::UserSuppliedFuzzer::Mutate( Data,
+ files[0].size(),
+ MaxSize );
+ }
+
+ size_t free_blocks = max_blocks - used_blocks;
+
+ // Allow each file to use up as much of the currently available space
+ // it can. If it uses or gives up blocks, add them or remove them
+ // from the pool.
+ for ( auto&& file : files )
+ {
+ size_t blocks_currently_used_by_file = ( file.size() + 511 ) / 512;
+ size_t blocks_available = blocks_currently_used_by_file +
+ free_blocks;
+ size_t max_size = blocks_available * 512;
+ size_t data_size = file.size();
+
+ file.resize( max_size );
+ file.resize( fuzzer::UserSuppliedFuzzer::Mutate( file.data(),
+ data_size,
+ max_size ) );
+
+ size_t blocks_now_used_by_file = ( file.size() + 511 ) / 512;
+ free_blocks = free_blocks +
+ blocks_currently_used_by_file -
+ blocks_now_used_by_file;
+ }
+
+ unique_ptr<struct archive,
+ decltype ( archive_write_free )*> a( archive_write_new(),
+ archive_write_free );
+
+ check_result( a.get(), archive_write_add_filter_none( a.get() ) );
+ check_result( a.get(), archive_write_set_format_ustar( a.get() ) );
+
+ // `used' may not be correct until after the archive is closed.
+ size_t used = 0xbadbeef;
+ check_result( a.get(), archive_write_open_memory( a.get(),
+ Data,
+ MaxSize,
+ &used ) );
+
+ {
+ unique_ptr<struct archive,
+ decltype ( archive_write_close )*> a_open( a.get(),
+ archive_write_close );
+
+ int file_index = 0;
+ for ( const auto& file : files )
+ {
+ unique_ptr<struct archive_entry,
+ decltype ( archive_entry_free )*>
+ e( archive_entry_new2( a_open.get() ),
+ archive_entry_free );
+
+ char name_buffer[100];
+ snprintf( name_buffer, 100, "file%d", file_index++ );
+
+ archive_entry_set_pathname( e.get(), name_buffer );
+ archive_entry_set_size( e.get(), file.size() );
+ archive_entry_set_filetype( e.get(), AE_IFREG );
+ archive_entry_set_perm( e.get(), 0644 );
+
+ check_result( a_open.get(),
+ archive_write_header( a_open.get(), e.get() ) );
+ archive_write_data( a_open.get(), file.data(), file.size() );
+ check_result( a_open.get(),
+ archive_write_finish_entry( a_open.get() ) );
+ }
+ }
+
+ return used;
+ }
+
+
+ // Cross `Data1' and `Data2', write up to `MaxOutSize' bytes into `Out',
+ // return the number of bytes written, which should be positive.
+ virtual size_t
+ CrossOver( const uint8_t* Data1,
+ size_t Size1,
+ const uint8_t* Data2,
+ size_t Size2,
+ uint8_t* Out,
+ size_t MaxOutSize )
+ {
+ return fuzzer::UserSuppliedFuzzer::CrossOver( Data1,
+ Size1,
+ Data2,
+ Size2,
+ Out,
+ MaxOutSize );
+ }
+
+ }; // end of FTFuzzer class
+
+
+ int
+ main( int argc,
+ char* *argv )
+ {
+ fuzzer::FuzzerRandomLibc Rand( 0 );
+ FTFuzzer F( &Rand );
+
+ fuzzer::FuzzerDriver( argc, argv, F );
+ }
+
+
+// END