The first line of your data is effectively a header:
=base64begin line=73 size=142698 crc=1e0db1eda49fad0c242c2da2071ea521501a91ad
The rest is base64. After converting that base64 into binary, you end up with some text:
bzip2,7,16813,16573,16672,16636,15710,14413,7264,
... followed by a bzip2 file. I don't know what this "header" data is, but after removing that, the rest can be extracted using bunzip2. The result is an RTF file that contains some images.
Your next steps should be to get more information about what's storing the data in the database, and exactly what its steps are. They appear to be:
- Compress the file
- Add the "header" prefix starting "bzip2"
- Convert the result to base64
- Add another "header" prefix with the CRC and length
- Store the resulting text
You should try to find out precise details of all of these steps so that you can undo them, performing any checks (e.g. CRC checks) along the way.
Here's a complete program that extracts the file from the sample you've given. I've guessed at the "inner" header form, but you should really try to find out what's creating the header so you can validate my assumptions.
using SharpCompress.Compressors.BZip2; using System; using System.IO; using System.Text; class Program { static void Main(string[] args) { string base64; using (var reader = File.OpenText(args[0])) { // Skip the first line, which has some header information // TODO: Use it instead, to validate the rest of the data. reader.ReadLine(); base64 = reader.ReadToEnd(); } byte[] bytes = Convert.FromBase64String(base64); int startOfBody = FindStartOfBody(bytes); using (var input = new MemoryStream(bytes, startOfBody, bytes.Length - startOfBody)) { using (var bzip2 = new BZip2Stream(input, SharpCompress.Compressors.CompressionMode.Decompress, true)) { using (var output = File.OpenWrite(args[1])) { bzip2.CopyTo(output); } } } } private static int FindStartOfBody(byte[] bytes) { // The file starts with a "header" of an unknown format, which we need to // skip. It looks like the format *might* be a sequence of comma-separated values // - Name of some kind (BZIP2) // - Number of further values // - The remaining values // That's what this code does. int offset = 0; // Skip the name GetNextHeaderValue(bytes, ref offset); // Find out how many more values there are string valueCountText = GetNextHeaderValue(bytes, ref offset); int valueCount = int.Parse(valueCountText); // Skip them for (int i = 0; i < valueCount; i++) { GetNextHeaderValue(bytes, ref offset); } // We'll now be positioned at the end return offset; } private static string GetNextHeaderValue(byte[] bytes, ref int offset) { StringBuilder builder = new StringBuilder(); // TODO: Validation that we're not going past the end of the data... // We assume all header data is ASCII. for (; bytes[offset] != ','; offset++) { builder.Append((char) bytes[offset]); } // Move the offset past the comma offset++; return builder.ToString(); } }