Content Export & Import
    • PDF

    Content Export & Import

    • PDF

    Article summary

    Glasswall provides the ability to export and import content items for supported file types. This allows internal components of processed files to be made available to external processes and applications for additional processing outside of the Glasswall Embedded Engine domain. Once exported, these components can be validated externally before the Glasswall Engine imports the components and recomposes the files.

    Files must be processed by the Glasswall Embedded Engine twice; once to extract a package containing the components that make up a file (export), and a second pass to reintegrate the externally analysed and/or modified components back into the file (import). See Content Export & Import.

    Files can be exported individually from a file path or in memory using the export_file method, or all files from a directory can be exported using the export_directory method.

    Examples

    Export

    Export from file path to file path

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to export a file, writing the export archive to a new path
    editor.export_file(
        input_file=r"C:\gwpw\input\TestFile_11.doc",
        output_file=r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip",
    )
    
    

    Export from file path to memory

    export_file returns the exported archive file's bytes. The below example demonstrates assigning the variable export_archive and checking the contents of the beginning of an Editor export archive.

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to export a file
    export_archive = editor.export_file(
        input_file=r"C:\gwpw\input\TestFile_11.doc",
    )
    
    assert export_archive[:8] == b'PK\x03\x04\x14\x00\x0e\x00'
    
    

    Export from memory

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Read file from disk to memory
    with open(r"C:\gwpw\input\TestFile_11.doc", "rb") as f:
        input_bytes = f.read()
    
    # Use the default policy to export a file
    export_archive = editor.export_file(
        input_file=input_bytes,
    )
    
    assert export_archive[:8] == b'PK\x03\x04\x14\x00\x0e\x00'
    
    

    Export files in a directory

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to export a directory of files, writing the export archives to a new directory.
    editor.export_directory(
        input_directory=r"C:\gwpw\input",
        output_directory=r"C:\gwpw\output\editor\export_directory"
    )
    
    

    Export files in a directory that may contain unsupported file types

    The default behaviour of the Glasswall Python wrapper is to raise the relevant exception (see: glasswall.libraries.editor.errors) if processing fails. Passing raise_unsupported=False will prevent an exception being raised and can be useful when working with a directory containing a mixture of both supported and unsupported file types when it is desirable to process as many of the files as possible instead of terminating on the first failure.

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to export a directory of files, writing the export archives to a new directory.
    editor.export_directory(
        input_directory=r"C:\gwpw\input_with_unsupported_file_types",
        output_directory=r"C:\gwpw\output\editor\export_directory_unsupported",
        raise_unsupported=False
    )
    
    

    Export files in a directory using a custom content management policy

    Using glasswall.content_management.policies.Editor:

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use a custom Editor policy to export all files in the input directory
    # and write them to export_directory_custom directory. Write streams as
    # ".xml" instead of the default interchange_type, ".sisl". Export embedded
    # images as ".xml" instead of their default image file type.
    editor.export_directory(
        input_directory=r"C:\gwpw\input",
        output_directory=r"C:\gwpw\output\editor\export_directory_custom",
        content_management_policy=glasswall.content_management.policies.Editor(
            default="sanitise",
            config={
                "sysConfig": {
                    "interchange_type": "xml",
                    "export_embedded_images": "true",
                },
            }
        ),
        raise_unsupported=False
    )
    
    

    Export files in a directory conditionally based on file format

    The example below demonstrates processing only .doc and .docx files from a nested directory containing multiple file formats.

    import os
    
    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    input_directory = r"C:\gwpw\input"
    output_directory = r"C:\gwpw\output\editor\export_directory_file_format"
    
    # Iterate relative file paths from input_directory
    for relative_file in glasswall.utils.list_file_paths(input_directory, absolute=False):
        # Construct absolute paths
        input_file = os.path.join(input_directory, relative_file)
        output_file = os.path.join(output_directory, relative_file + ".zip")
    
        # Get the file type of the file
        file_type = editor.determine_file_type(
            input_file=input_file,
            as_string=True,
            raise_unsupported=False
        )
    
        # Export only doc and docx files
        if file_type in ["doc", "docx"]:
            editor.export_file(input_file, output_file)
    
    

    Import

    Export archives can be imported individually from a file path or in memory using the import_file method, or all export archives from a directory can be imported using the import_directory method.

    Import from file path to file path

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to import an export archive, writing the imported file to a new path
    editor.import_file(
        input_file=r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip",
        output_file=r"C:\gwpw\output\editor\import_f2f\TestFile_11.doc",
    )
    
    

    Import from file path to memory

    import_file returns the imported file's bytes. The below example demonstrates assigning the variable file_bytes and checking the contents of the beginning of an Editor export archive.

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to import an export archive
    file_bytes = editor.import_file(
        input_file=r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip",
    )
    
    assert file_bytes[:8] == b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
    
    

    Import from memory

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Read file from disk to memory
    with open(r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip", "rb") as f:
        export_archive_bytes = f.read()
    
    # Use the default policy to import an export archive
    file_bytes = editor.import_file(
        input_file=export_archive_bytes,
    )
    
    assert file_bytes[:8] == b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
    
    

    Import files in a directory

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to import a directory of export archives, writing the import archives to a new directory.
    editor.import_directory(
        input_directory=r"C:\gwpw\output\editor\export_directory",
        output_directory=r"C:\gwpw\output\editor\import_directory"
    )
    
    

    Import files in a directory that may contain unsupported file types

    The default behaviour of the Glasswall Python wrapper is to raise the relevant exception (see: glasswall.libraries.editor.errors) if processing fails. Passing raise_unsupported=False will prevent an exception being raised and can be useful when working with a directory containing a mixture of both supported and unsupported file types when it is desirable to process as many of the files as possible instead of terminating on the first failure.

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use the default policy to export a directory of export archives, writing the export archives to a new directory.
    editor.import_directory(
        input_directory=r"C:\gwpw\output\editor\export_directory_unsupported",
        output_directory=r"C:\gwpw\output\editor\import_directory_unsupported",
        raise_unsupported=False
    )
    
    

    Import files in a directory using a custom content management policy

    Using glasswall.content_management.policies.Editor:

    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    # Use a custom Editor policy to import all files in the export directory
    # and write them to import_directory_custom directory. Read streams as
    # ".xml" instead of the default interchange_type, ".sisl".
    editor.import_directory(
        input_directory=r"C:\gwpw\output\editor\export_directory_custom",
        output_directory=r"C:\gwpw\output\editor\import_directory_custom",
        content_management_policy=glasswall.content_management.policies.Editor(
            default="sanitise",
            config={
                "sysConfig": {
                    "interchange_type": "xml",
                },
            }
        ),
        raise_unsupported=False
    )
    
    

    Import files in a directory conditionally based on file format

    The example below demonstrates processing only .doc and .docx files from a nested directory containing multiple file formats.

    import os
    
    import glasswall
    
    
    # Load the Glasswall Editor library
    editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
    
    input_directory = r"C:\gwpw\output\editor\export_directory_file_format"
    output_directory = r"C:\gwpw\output\editor\import_directory_file_format"
    
    # Iterate relative file paths from input_directory
    for relative_file in glasswall.utils.list_file_paths(input_directory, absolute=False):
        # Construct absolute paths
        input_file = os.path.join(input_directory, relative_file)
        output_file = os.path.join(output_directory, os.path.splitext(relative_file)[0])
    
        # Get the file type of the file
        file_type = editor.determine_file_type(
            input_file=input_file,
            as_string=True,
            raise_unsupported=False
        )
    
        # Import only doc.zip and docx.zip files
        if file_type == "zip" and input_file.endswith(("doc.zip", "docx.zip",)):
            editor.import_file(input_file, output_file)
    
    

    Export and Analyse

    These high-level functions let you run Export together with Analysis within a single session. For more information, see the documentation links below.


    API Documentation

    https://glasswall-python-wrapper-documentation.glasswall.com/


    Was this article helpful?