
|
If you were logged in you would be able to see more operations.
|
|
Sesame
Created: 16/Jan/09 08:52 AM
Updated: 22/Oct/09 10:02 PM
|
|
| Component/s: |
Native Sail
|
| Affects Version/s: |
2.0,
2.1,
2.0.1,
2.2,
2.1.1,
2.1.2,
2.1.3,
2.2.1,
2.1.4,
2.2.2,
2.2.3,
2.2.4,
2.3-pr1
|
| Fix Version/s: |
2.3.0
|
|
Issue reported at http://www.openrdf.org/forum/mvnforum/viewthread?thread=1954
Hi everybody,
I might have found a problem in Sesame when concurrently reading and writing data to a native store. What I am doing is the following: writing data to a repository on Thread A, reading this data on Thread B after Thread A committed the data. Sometimes Thread B is not capable of reading the data, while Thread A confirmed the data is present in the store.
I have written some test code which can be used to show this problem:
package sesame;
import java.io.File;
import java.util.HashSet;
import java.util.Set;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.ValueFactory;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.sail.nativerdf.NativeStore;
/**
* Test the reading and simultaneous writing of data to a native store.
* <p />
* Project TestProject<br />
* ReadWriteTest.java created 13 Jan 2009
* <p />
* Copyright © 2009 SemLab
* @author <a href="mailto:borsje@semlab.nl">J.A. Borsje</a>
* @version $Revision:$, $Date:$
*/
public class ReadWriteTest
{
private static final int NUM_OF_WRITES = 100000;
private static final int TEN_PROCENT = ReadWriteTest.NUM_OF_WRITES / 10;
private static Repository repo;
private static URI TEST_SUBJECT;
private static URI TEST_PREDICATE;
private static ValueFactory vf;
private static final Set<Integer> WRITTEN_NUMS = new HashSet<Integer>();
private static final Set<Thread> THREADS = new HashSet<Thread>();
/**
* Run the test.
* @param args The arguments to this test class, this is not used.
* @throws RepositoryException Thrown when the repository could not be initialized.
*/
public static void main(String[] args) throws RepositoryException
{
// Initialize the repository.
ReadWriteTest.repo = new SailRepository(new NativeStore(new File("./temp/store"), "spoc,posc"));
ReadWriteTest.repo.initialize();
ReadWriteTest.vf = ReadWriteTest.repo.getValueFactory();
ReadWriteTest.TEST_SUBJECT = ReadWriteTest.vf.createURI("http://semlab.nl/test_subject");
ReadWriteTest.TEST_PREDICATE = ReadWriteTest.vf.createURI("http://semlab.nl/test_predicate");
// Run the test.
for (int i = 0; i < ReadWriteTest.NUM_OF_WRITES; i++)
{
try
{
new WriteThread(ReadWriteTest.repo.getConnection(), i).start();
}
catch (RepositoryException ex)
{
System.err.println("Could not get connection: " + ex.getMessage());
}
// Print the progress.
if (i % ReadWriteTest.TEN_PROCENT == 0)
{
System.out.println("progress: " + ((double) i / (double) ReadWriteTest.NUM_OF_WRITES));
}
}
// Check if the WRITTEN_NUMS set is empty, if not: something went wrong.
while (!ReadWriteTest.THREADS.isEmpty())
{
synchronized (ReadWriteTest.THREADS)
{
try
{
ReadWriteTest.THREADS.wait();
}
catch (InterruptedException ex)
{
// Ignore.
}
}
}
synchronized (ReadWriteTest.WRITTEN_NUMS)
{
System.err.println("------------------------------");
if (ReadWriteTest.WRITTEN_NUMS.size() != 0)
{
System.err.println("There are " + ReadWriteTest.WRITTEN_NUMS.size()
+ " written numbers which are not read properly!");
System.err.println("Contents of WRITTEN_NUMS is: " + ReadWriteTest.WRITTEN_NUMS);
}
else
{
System.err.println("Test succeeded.");
}
System.err.println("------------------------------");
}
// Clean up.
RepositoryConnection connection = ReadWriteTest.repo.getConnection();
connection.clear();
System.err.println("Number of statements after clear: " + connection.size());
connection.close();
ReadWriteTest.repo.shutDown();
}
// --- The threads which are executed to test everything.
/**
* An abstract super class for the test threads which holds a {@link RepositoryConnection}.
* <p />
* Project TestProject<br />
* ReadWriteTest.java created 13 Jan 2009
* <p />
* Copyright © 2009 SemLab
* @author <a href="mailto:borsje@semlab.nl">J.A. Borsje</a>
* @version $Revision:$, $Date:$
*/
private static abstract class TestThread extends Thread
{
protected final RepositoryConnection connection;
protected final int num;
/**
* @param connection A connection to the repository.
* @param num The number of the test.
*/
protected TestThread(RepositoryConnection connection, int num)
{
this.num = num;
this.connection = connection;
synchronized (ReadWriteTest.THREADS)
{
ReadWriteTest.THREADS.add(this);
}
}
protected void done()
{
synchronized (ReadWriteTest.THREADS)
{
ReadWriteTest.THREADS.remove(this);
ReadWriteTest.THREADS.notify();
}
}
}
/**
* Write a number to the store.
* <p />
* Project TestProject<br />
* ReadWriteTest.java created 13 Jan 2009
* <p />
* Copyright © 2009 SemLab
* @author <a href="mailto:borsje@semlab.nl">J.A. Borsje</a>
* @version $Revision:$, $Date:$
*/
private static class WriteThread extends TestThread
{
/**
* @param connection A connection to the repository.
* @param num The number of the test.
*/
public WriteThread(RepositoryConnection connection, int num)
{
super(connection, num);
}
/**
* @see java.lang.Thread#run()
*/
public void run()
{
try
{
super.connection.add(ReadWriteTest.TEST_SUBJECT, ReadWriteTest.TEST_PREDICATE, ReadWriteTest.vf
.createLiteral(super.num));
super.connection.commit(); // This is not needed, because we did not mess with the auto commit setting.
boolean success = super.connection.hasStatement(ReadWriteTest.TEST_SUBJECT, ReadWriteTest.TEST_PREDICATE,
ReadWriteTest.vf.createLiteral(super.num), false);
super.connection.close();
if (success)
{
synchronized (ReadWriteTest.WRITTEN_NUMS)
{
ReadWriteTest.WRITTEN_NUMS.add(super.num);
}
}
else
{
System.err.println("Somehow the writing of [" + super.num + "] went wrong!");
}
new ReadThread(ReadWriteTest.repo.getConnection(), super.num).start();
}
catch (RepositoryException ex)
{
System.err.println("Could not write to store or get connection: " + ex.getMessage());
}
super.done();
}
}
/**
* Read a number from the store.
* <p />
* Project TestProject<br />
* ReadWriteTest.java created 13 Jan 2009
* <p />
* Copyright © 2009 SemLab
* @author <a href="mailto:borsje@semlab.nl">J.A. Borsje</a>
* @version $Revision:$, $Date:$
*/
private static class ReadThread extends TestThread
{
/**
* @param connection A connection to the repository.
* @param num The number of the test.
*/
public ReadThread(RepositoryConnection connection, int num)
{
super(connection, num);
}
/**
* @see java.lang.Thread#run()
*/
public void run()
{
try
{
RepositoryResult<Statement> result = super.connection.getStatements(ReadWriteTest.TEST_SUBJECT,
ReadWriteTest.TEST_PREDICATE, ReadWriteTest.vf.createLiteral(super.num), false);
if (!result.hasNext())
{
System.err.println("The statement for [" + super.num + "] does not exist.");
}
else
{
result.next();
if (result.hasNext())
{
System.err.println("There is more then one statement for [" + super.num + "]");
}
else
{
// There is exactly one statement for this num, so remove it from the set.
synchronized (ReadWriteTest.WRITTEN_NUMS)
{
ReadWriteTest.WRITTEN_NUMS.remove(super.num);
}
}
}
result.close();
super.connection.close();
}
catch (RepositoryException ex)
{
System.err.println("Could not read statements: " + ex.getMessage());
}
super.done();
}
}
}
This code write 100.000 numbers to a store and tries to read them, using separate threads for reading and writing. When you execute this program you will see that sometimes a value is written but could not be read.
I am wondering if this problem is related to SES-527.
|
|
With the NativeStore, this issue is reproducable on linux machines, but not on Windows (XP) machine. With the MemoryStore, howerver, the issue (also) affects Windows machines.
The problem in the MemoryStore has been resolved. Some of the ValueFactory-methods that are implemented by MemValueFactory were missing proper synchronization.
The problem was tracked down to a concurrency issue in HashFile. Operations on this class weren't properly synchronized, causing read operations to miss data during rehashing. The data was properly inserted, just not visible at specific point in time during concurrent updates.
|
|