/*
WebMerge.java
This is an example of a complete, simple Java application, that may even
be
useful. Its purpose is to merge a set of HTML (World-Wide Web documents)
so that a consistent style is applied to all of them.
This program does not have a graphical user interface; it is so simple
that
it does not need one. It is designed to be run from the command prompt, by
typing a command something like
java WebMerge mysite.html page1.html page2.html page3.html...
When used this way, the file `mysite.html will be treated as the
`template
file. This file will form the background for all the other files.
Somewhere
in this should be the string `%%CONTENT%%. This string will be
replaced
by each of the other web pages `page1.html, `page2.html,... In
the process
a new set of file will be written, in a directory whose name is the same
as the template file. So the example above will write a new set of files
called `page1.html etc, in a directory called `mysite.
If the specified pages have their own specifications of background colour,
or
background image, this information is removed, and replaced with whatever
was specified in the template. In other words, the program looks for the
`body section of each HTML file and extracts it.
This program demonstrates two simple methods for managing data files in
Java, both using the `FileInputStream and `FileOutputStream
classes.
In the operation `readFile, the data file is read into a string one
character at a time. In `writeFile a whole block of data is written
in one go. Note that FileInputStream and FileOutputStream allow only
these simple operations on files, that is, reading and writing fixed
numbers of bytes of data. For more sophisticated file processing, one
might prefer to use classes like DataInputStream.
Kevin Boone, June 1999
*/
import java.io.*;
public class WebMerge
{
public static void main (String args[])
{
// `args is an array of strings that corresponds to the information
the user
// specified on the command line. `args[0] is the first item on the
// command line, `args[1] is the second, and so on. In this program
the
// user must specify at least two things: the template file and one file
// to process. So args.length must be at least 2 to continue.
if (args.length < 2)
{
System.err.println
("You are using this program incorrectly. You must specify
the");
System.err.println
("template HTML file and the input HTML files on the command
line");
System.exit(-1);
}
// In Java, a File object represents the name of a file or directory. When
we need
// read or write a file, the FileInputStream and FileOutputStream classes
will
// expect to be given a File object that represents the file to process
File templateFile = new File(args[0]);
/// QUESTION: what does it mean if `canRead returns the value
`false?
if (!templateFile.canRead())
{
// This file cant be read: no point to carrying on
System.err.println("Cant read template file `" +
templateFile + "");
System.exit(-1);
}
// The name for the new directory is given by the filename of the template
file,
// with the `.html suffix stripped off. So we will inspect the
filename to
// check that it ends in .html, and if it does make the directory name
int htmlSuffixPosition =
templateFile.toString().lastIndexOf(".html");
if (htmlSuffixPosition < 0)
{
// The name of the template file does not end in `.html
System.err.println("Filename `" + templateFile + "
does not end in `.html");
System.exit(-1);
}
/// QUESTION: what would happen if the user specied a filename like
`fred.html.xyz?
/// Would the program work, and what would the new directory end up being
called?
String directoryName = new String
(templateFile.toString().substring(0, htmlSuffixPosition));
// If the directory has been created already, we dont need to do
anything (but
// warn the user). If it hasnt, create it now
// Note that Java uses the same class `File to represent the name of
either
// at file, or a directory.
/// QUESTION: is it possible to construct a new File object so that
its name
/// corresponds to a file that does not exist?
File directory = new File(directoryName);
if (directory.isDirectory())
{
System.out.println("Directory `" + directoryName + "
already exists");
}
else
{
// Create the directory
if (directory.mkdir())
System.out.println("`" + directoryName + "
created");
else
{
// The name of the template file does not end in `.html
System.err.println("Directory `" + directoryName
+ " did not exist and could not be created");
System.exit(-1);
}
}
// As we will use the data in the template many times (if the user
specifies many
// files on the command line), we will read the data from that file into a
// String at this stage, and then work with the String. This means we only
// have to read the template file once
String templateString = readFile(templateFile);
// So now we will process the user-specified files, one at a time. The
operation
// `processFile will be called to do this
for (int i = 1; i < args.length; i++)
{
String inputFilename = new String(args[i]);
/// QUESTION: what does the line below do?
String outputFilename = new String(directoryName + "/" +
inputFilename);
processFile (templateString, inputFilename, outputFilename);
}
}
/*
processFile
This operation takes the template file and one input file, and merges the
two into a new file.
Parameters:
templateString a String containing the text of the template file
inputFilename the HTML file to read
outputFilename the new file to write
*/
public static void processFile (String templateString, String
inputFilename,
String outputFilename)
{
System.out.println ("processing file `" + inputFilename
+ " to file `" + outputFilename + "");
File inputFile = new File(inputFilename);
File outputFile = new File(outputFilename);
if (!inputFile.canRead())
{
// This file cant be read: no point to carrying on
System.err.println("Cant read input file `"
+ inputFilename + "");
return;
}
// Get the text of the input file into a string for processing
/// QUESTION: suppose the input file were several megabyte long.
/// Do you think that reading the whole file into a string would
/// be sensible?
StrininputString = readFile(inputFile);
// Later we will want to write the new file to disk. Before we try
// this, we need to ensure that the file can be written. So if
// there is already a file with the same name, check that it
// can be written
if (outputFile.exists() && !outputFile.canWrite())
{
// This file cant be written: no point to carrying on
System.err.println("Cant write output file `"
+ outputFilename + "");
return;
}
// At this point, we have the input file in a String called
`inputString
// and we think the output file can probably be written. Now we
// must extract the `body of the Web page from inputString.
// The `body is between the HTML tags `body and `/body.
If these
// tags cant be found in the file, then take the whole file as
// input.
int bodyTagPos = inputString.indexOf ("<body");
if (bodyTagPos < 0) bodyTagPos = inputString.indexOf
("<BODY");
if (bodyTagPos >= 0)
{
inputString = inputString.substring (bodyTagPos + 1);
bodyTagPos = inputString.indexOf(">");
if (bodyTagPos >= 0) inputString = inputString.substring (bodyTagPos +
1);
}
bodyTagPos = inputString.indexOf ("</body");
if (bodyTagPos < 0) bodyTagPos = inputString.indexOf
("</BODY");
if (bodyTagPos >= 0) inputString = inputString.substring (0,
bodyTagPos);
// So now, `inputString contains the `body of the Web page.
Now we look for
// the tag `%%CONTENT%% in the template, and replace this tag with
the
// complete Web page body.
int templateTagPos = templateString.indexOf ("%%CONTENT%%");
if (templateTagPos < 0)
{
System.err.println("The template file does not contain the
text");
System.err.println("`%%CONTENT%%, so the Web page cant be
merged in");
return;
}
// Make a new string that contains the template data up to
`%%CONTNENT%%, plus
// the `body of the input file, plus the remainer of the template
file
templateString = templateString.substring(0, templateTagPos)
+ inputString
+ templateString.substring(templateTagPos + 11);
// Write this file to disk, and the jobs done.
if (!writeFile (outputFile, templateString))
System.err.println("Cant write file `" + outputFilename +
"");
}
/*
readFile
This operation reads a file and returns its contents as a string.
If there is any problem, it stop the program. This is very ugly,
but adequate for this simple program. The caller of this
operation should check that the file is readble before calling.
Paramters:
file The file object that contains the name of the file to read
Returns:
A string whose characters correspond to the bytes in the file
*/
public static String readFile(File file)
{
String s = new String("");
try
{
/// QUESTION: what does the operation `available() do?
FileInputStream inputStream = new FileInputStream(file.toString());
while (inputStream.available() > 0)
{
s = s + (char)inputStream.read();
}
}
catch (FileNotFoundException e)
{
System.err.println(e.toString());
System.exit(-1);
}
catch (IOException e)
{
System.err.println(e.toString());
System.exit(-1);
}
return s;
}
/*
writeFile
This operation writes the specified string of text to the specified
file. It returns a `true or `false value to indicate whether
the operation was successful or not
Paramters:
file The file object that contains the name of the file to read
text A string containing the text to write to the file
Returns:
The value `true if the file was written successfully, or false
otherwise
*/
public static boolean writeFile(File file, String text)
{
try
{
FileOutputStream outputStream = new FileOutputStream(file.toString());
byte textBytes[] = text.getBytes();
outputStream.write(textBytes, 0, textBytes.length);
}
catch (IOException e)
{
return false;
}
return true;
}
}