Content Export & Import

Prev Next

Glasswall provides the ability to export and import content items for supported file types. This allows internal components of processed files to be made available to external processes and applications for additional processing outside of the Glasswall Embedded Engine domain. Once exported, these components can be validated externally before the Glasswall Engine imports the components and recomposes the files.

Files must be processed by the Glasswall Embedded Engine twice; once to extract a package containing the components that make up a file (export), and a second pass to reintegrate the externally analysed and/or modified components back into the file (import). See Content Export & Import.

Files can be exported individually from a file path or in memory using the export_file method, or all files from a directory can be exported using the export_directory method.

Examples

Export

Export from file path to file path

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to export a file, writing the export archive to a new path
editor.export_file(
    input_file=r"C:\gwpw\input\TestFile_11.doc",
    output_file=r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip",
)

Export from file path to memory

export_file returns the exported archive file's bytes. The below example demonstrates assigning the variable export_archive and checking the contents of the beginning of an Editor export archive.

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to export a file
export_archive = editor.export_file(
    input_file=r"C:\gwpw\input\TestFile_11.doc",
)

assert export_archive[:8] == b'PK\x03\x04\x14\x00\x0e\x00'

Export from memory

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Read file from disk to memory
with open(r"C:\gwpw\input\TestFile_11.doc", "rb") as f:
    input_bytes = f.read()

# Use the default policy to export a file
export_archive = editor.export_file(
    input_file=input_bytes,
)

assert export_archive[:8] == b'PK\x03\x04\x14\x00\x0e\x00'

Export files in a directory

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to export a directory of files, writing the export archives to a new directory.
editor.export_directory(
    input_directory=r"C:\gwpw\input",
    output_directory=r"C:\gwpw\output\editor\export_directory"
)

Export files in a directory that may contain unsupported file types

The default behaviour of the Glasswall Python wrapper is to raise the relevant exception (see: glasswall.libraries.editor.errors) if processing fails. Passing raise_unsupported=False will prevent an exception being raised and can be useful when working with a directory containing a mixture of both supported and unsupported file types when it is desirable to process as many of the files as possible instead of terminating on the first failure.

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to export a directory of files, writing the export archives to a new directory.
editor.export_directory(
    input_directory=r"C:\gwpw\input_with_unsupported_file_types",
    output_directory=r"C:\gwpw\output\editor\export_directory_unsupported",
    raise_unsupported=False
)

Export files in a directory using a custom content management policy

Using glasswall.content_management.policies.Editor:

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use a custom Editor policy to export all files in the input directory
# and write them to export_directory_custom directory. Write streams as
# ".xml" instead of the default interchange_type, ".sisl". Export embedded
# images as ".xml" instead of their default image file type.
editor.export_directory(
    input_directory=r"C:\gwpw\input",
    output_directory=r"C:\gwpw\output\editor\export_directory_custom",
    content_management_policy=glasswall.content_management.policies.Editor(
        default="sanitise",
        config={
            "sysConfig": {
                "interchange_type": "xml",
                "export_embedded_images": "true",
            },
        }
    ),
    raise_unsupported=False
)

Export files in a directory conditionally based on file format

The example below demonstrates processing only .doc and .docx files from a nested directory containing multiple file formats.

import os

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

input_directory = r"C:\gwpw\input"
output_directory = r"C:\gwpw\output\editor\export_directory_file_format"

# Iterate relative file paths from input_directory
for relative_file in glasswall.utils.list_file_paths(input_directory, absolute=False):
    # Construct absolute paths
    input_file = os.path.join(input_directory, relative_file)
    output_file = os.path.join(output_directory, relative_file + ".zip")

    # Get the file type of the file
    file_type = editor.determine_file_type(
        input_file=input_file,
        as_string=True,
        raise_unsupported=False
    )

    # Export only doc and docx files
    if file_type in ["doc", "docx"]:
        editor.export_file(input_file, output_file)


Import

Export archives can be imported individually from a file path or in memory using the import_file method, or all export archives from a directory can be imported using the import_directory method.

Import from file path to file path

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to import an export archive, writing the imported file to a new path
editor.import_file(
    input_file=r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip",
    output_file=r"C:\gwpw\output\editor\import_f2f\TestFile_11.doc",
)

Import from file path to memory

import_file returns the imported file's bytes. The below example demonstrates assigning the variable file_bytes and checking the contents of the beginning of an Editor export archive.

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to import an export archive
file_bytes = editor.import_file(
    input_file=r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip",
)

assert file_bytes[:8] == b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'

Import from memory

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Read file from disk to memory
with open(r"C:\gwpw\output\editor\export_f2f\TestFile_11.doc.zip", "rb") as f:
    export_archive_bytes = f.read()

# Use the default policy to import an export archive
file_bytes = editor.import_file(
    input_file=export_archive_bytes,
)

assert file_bytes[:8] == b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'

Import files in a directory

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to import a directory of export archives, writing the import archives to a new directory.
editor.import_directory(
    input_directory=r"C:\gwpw\output\editor\export_directory",
    output_directory=r"C:\gwpw\output\editor\import_directory"
)

Import files in a directory that may contain unsupported file types

The default behaviour of the Glasswall Python wrapper is to raise the relevant exception (see: glasswall.libraries.editor.errors) if processing fails. Passing raise_unsupported=False will prevent an exception being raised and can be useful when working with a directory containing a mixture of both supported and unsupported file types when it is desirable to process as many of the files as possible instead of terminating on the first failure.

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use the default policy to export a directory of export archives, writing the export archives to a new directory.
editor.import_directory(
    input_directory=r"C:\gwpw\output\editor\export_directory_unsupported",
    output_directory=r"C:\gwpw\output\editor\import_directory_unsupported",
    raise_unsupported=False
)

Import files in a directory using a custom content management policy

Using glasswall.content_management.policies.Editor:

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Use a custom Editor policy to import all files in the export directory
# and write them to import_directory_custom directory. Read streams as
# ".xml" instead of the default interchange_type, ".sisl".
editor.import_directory(
    input_directory=r"C:\gwpw\output\editor\export_directory_custom",
    output_directory=r"C:\gwpw\output\editor\import_directory_custom",
    content_management_policy=glasswall.content_management.policies.Editor(
        default="sanitise",
        config={
            "sysConfig": {
                "interchange_type": "xml",
            },
        }
    ),
    raise_unsupported=False
)

Import files in a directory conditionally based on file format

The example below demonstrates processing only .doc and .docx files from a nested directory containing multiple file formats.

import os

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

input_directory = r"C:\gwpw\output\editor\export_directory_file_format"
output_directory = r"C:\gwpw\output\editor\import_directory_file_format"

# Iterate relative file paths from input_directory
for relative_file in glasswall.utils.list_file_paths(input_directory, absolute=False):
    # Construct absolute paths
    input_file = os.path.join(input_directory, relative_file)
    output_file = os.path.join(output_directory, os.path.splitext(relative_file)[0])

    # Get the file type of the file
    file_type = editor.determine_file_type(
        input_file=input_file,
        as_string=True,
        raise_unsupported=False
    )

    # Import only doc.zip and docx.zip files
    if file_type == "zip" and input_file.endswith(("doc.zip", "docx.zip",)):
        editor.import_file(input_file, output_file)


Export/Import & Analyse

These high-level functions let you run Export or Import and generate an analysis report within a single session. For more information, see the documentation links below.


API Documentation

https://glasswall-python-wrapper-documentation.glasswall.com/