public class KAFDocument extends Object implements Serializable
Modifier and Type | Class and Description |
---|---|
class |
KAFDocument.FileDesc |
static class |
KAFDocument.Layer |
class |
KAFDocument.Public |
Constructor and Description |
---|
KAFDocument(String lang,
String version)
Creates an empty KAFDocument element
|
Modifier and Type | Method and Description |
---|---|
void |
addConstituencyFromParentheses(String parseOut) |
void |
addConstituencyFromParentheses(String parseOut,
Integer sentence) |
void |
addConstituencyString(String constituencyString,
Integer sent) |
LinguisticProcessor |
addLinguisticProcessor(String layer,
LinguisticProcessor linguisticProcessor) |
LinguisticProcessor |
addLinguisticProcessor(String layer,
String name)
Adds a linguistic processor to the document header.
|
LinguisticProcessor |
addLinguisticProcessor(String layer,
String name,
String version)
Deprecated
|
LinguisticProcessor |
addLinguisticProcessor(String layer,
String name,
String timestamp,
String version)
Deprecated
|
void |
addLinguisticProcessors(Map<String,List<LinguisticProcessor>> lps) |
Feature |
createCategory(String lemma,
List<List<Term>> references)
Deprecated
|
Feature |
createCategory(String id,
String lemma,
List<List<Term>> references)
Deprecated
|
Chunk |
createChunk(String id,
Term head,
String phrase,
List<Term> terms)
Deprecated
|
Chunk |
createChunk(Term head,
String phrase,
List<Term> terms)
Deprecated
|
Coref |
createCoref(List<List<Target>> references)
Deprecated
|
Coref |
createCoref(String id,
List<List<Target>> references)
Deprecated
|
Dep |
createDep(Term from,
Term to,
String rfunc)
Deprecated
|
Entity |
createEntity(String type,
List<List<Term>> references)
Deprecated
|
Entity |
createEntity(String id,
String type,
List<List<Term>> references)
Deprecated
|
ExternalRef |
createExternalRef(String resource,
String reference)
Deprecated
|
KAFDocument.FileDesc |
createFileDesc() |
static KAFDocument |
createFromFile(File file)
Creates a new KAFDocument and loads the contents of the file passed as argument
|
static KAFDocument |
createFromStream(Reader stream)
Creates a new KAFDocument loading the content read from the reader given on argument.
|
Opinion |
createOpinion()
Deprecated
|
Opinion |
createOpinion(String id)
Deprecated
|
Feature |
createProperty(String lemma,
List<List<Term>> references)
Deprecated
|
Feature |
createProperty(String id,
String lemma,
List<List<Term>> references)
Deprecated
|
KAFDocument.Public |
createPublic() |
Relation |
createRelation(Relational from,
Relational to)
Deprecated
|
Relation |
createRelation(String id,
Relational from,
Relational to)
Deprecated
|
Term.Sentiment |
createSentiment()
Deprecated
|
static Target |
createTarget(Term term)
Deprecated.
|
static Target |
createTarget(Term term,
boolean isHead)
Deprecated.
|
Term |
createTerm(String type,
String lemma,
String pos,
List<WF> wfs)
Deprecated
|
Term |
createTerm(String id,
String type,
String lemma,
String pos,
List<WF> wfs)
Deprecated
|
Term |
createTermOptions(String type,
String lemma,
String pos,
String morphofeat,
List<WF> wfs)
Deprecated
|
WF |
createWF(String form)
Deprecated
|
WF |
createWF(String form,
int offset)
Deprecated
|
WF |
createWF(String id,
String form)
Deprecated
|
List<Feature> |
getCategories()
Returns a list with all relations in the document
|
List<Chunk> |
getChunks() |
List<Chunk> |
getChunksByPara(Integer para) |
List<Chunk> |
getChunksBySent(Integer sent) |
List<CLink> |
getCLinks() |
List<Tree> |
getConstituents() |
List<Tree> |
getConstituentsBySent(Integer sent) |
List<Coref> |
getCorefs() |
List<Coref> |
getCorefsByTerm(Term term) |
List<Dep> |
getDepPath(Term from,
Term to) |
List<Dep> |
getDeps() |
List<Dep> |
getDepsByPara(Integer para) |
List<Dep> |
getDepsBySent(Integer sent) |
List<Dep> |
getDepsByTerm(Term term) |
List<Dep> |
getDepsFromTerm(Term term) |
Dep |
getDepToTerm(Term term) |
List<Entity> |
getEntities()
Returns a list with all entities in the document
|
List<Entity> |
getEntitiesByPara(Integer para) |
List<Entity> |
getEntitiesBySent(Integer sent) |
List<Entity> |
getEntitiesByTerm(Term term) |
List<Factuality> |
getFactualities() |
KAFDocument.FileDesc |
getFileDesc() |
Integer |
getFirstParagraph() |
Integer |
getFirstSentence() |
String |
getLang()
Returns the language of the processed document
|
Map<String,List<LinguisticProcessor>> |
getLinguisticProcessors()
Returns a hash of linguistic processors from the document.
|
List<LinkedEntity> |
getLinkedEntities() |
List<Mark> |
getMarks(String source) |
List<String> |
getMarkSources() |
Integer |
getNumParagraphs() |
Integer |
getNumSentences() |
List<Opinion> |
getOpinions() |
List<Opinion> |
getOpinions(String label) |
List<Predicate> |
getPredicates() |
List<Predicate> |
getPredicatesByPara(Integer para) |
List<Predicate> |
getPredicatesBySent(Integer sent) |
List<Predicate> |
getPredicatesByTerm(Term term) |
List<Feature> |
getProperties()
Returns a list with all relations in the document
|
KAFDocument.Public |
getPublic() |
String |
getRawText()
Returns the raw text *
|
List<Relation> |
getRelations()
Returns a list with all relations in the document
|
List<List<WF>> |
getSentences()
Returns a list with all sentences.
|
List<Term> |
getSentenceTerms(int sent) |
List<Integer> |
getSentsByParagraph(Integer para) |
List<Term> |
getTerms()
Returns a list with all terms in the document.
|
Set<Term> |
getTermsByDepAncestors(Iterable<Term> ancestors) |
Set<Term> |
getTermsByDepAncestors(Iterable<Term> ancestors,
String pattern) |
Set<Term> |
getTermsByDepDescendants(Iterable<Term> descendents) |
Set<Term> |
getTermsByDepDescendants(Iterable<Term> descendents,
String pattern) |
List<Term> |
getTermsByPara(Integer para) |
List<Term> |
getTermsBySent(Integer sent) |
List<Term> |
getTermsByWFs(List<WF> wfs)
Returns a list of terms containing the word forms given on argument.
|
List<Term> |
getTermsFromWFs(List<String> wfIds)
Deprecated.
|
Term |
getTermsHead(Iterable<Term> descendents) |
List<Timex3> |
getTimeExs() |
List<Timex3> |
getTimeExsBySent(Integer sent) |
List<Timex3> |
getTimeExsByTerm(Term term) |
List<Timex3> |
getTimeExsByWF(WF wf) |
List<TLink> |
getTLinks() |
List<org.jdom2.Element> |
getUnknownLayers() |
String |
getVersion()
Returns the KAF version
|
List<WF> |
getWFs()
Returns a list containing all WFs in the document
|
List<WF> |
getWFsByPara(Integer para) |
List<WF> |
getWFsBySent(Integer sent) |
String |
insertCategory(Feature category) |
String |
insertChunk(Chunk chunk) |
String |
insertCoref(Coref coref) |
void |
insertDep(Dep dep) |
String |
insertEntity(Entity entity) |
String |
insertOpinion(Opinion opinion) |
String |
insertProperty(Feature property) |
String |
insertRelation(Relation relation) |
String |
insertTerm(Term term) |
String |
insertTimex3(Timex3 timex3) |
String |
insertWF(WF wf) |
void |
join(KAFDocument doc)
Joins the document with another one.
|
boolean |
linguisticProcessorExists(String layer,
String name)
Returns wether the given linguistic processor is already defined or not.
|
boolean |
linguisticProcessorExists(String layer,
String name,
String version)
Returns wether the given linguistic processor is already defined or not.
|
static void |
main(String[] args) |
boolean |
matchDepPath(Term from,
Iterable<Dep> path,
String pattern) |
Feature |
newCategory(String lemma,
List<Span<Term>> references)
Creates a new category.
|
Feature |
newCategory(String id,
String lemma,
List<Span<Term>> references)
Creates a new category.
|
Chunk |
newChunk(String phrase,
Span<Term> span)
Creates a new chunk.
|
Chunk |
newChunk(String id,
String phrase,
Span<Term> span)
Creates a chunk object to load an existing chunk.
|
CLink |
newCLink(Predicate from,
Predicate to) |
CLink |
newCLink(String id,
Predicate from,
Predicate to) |
Term |
newCompound(List<Term> terms,
String lemma) |
Tree |
newConstituent(TreeNode root) |
Tree |
newConstituent(TreeNode root,
Integer sentence) |
Coref |
newCoref(List<Span<Term>> mentions)
Creates a new coreference.
|
Coref |
newCoref(String id,
List<Span<Term>> mentions)
Creates a coreference object to load an existing Coref.
|
Dep |
newDep(Term from,
Term to,
String rfunc)
Creates a new dependency.
|
Entity |
newEntity(List<Span<Term>> references)
Creates a new Entity.
|
Entity |
newEntity(String id,
List<Span<Term>> references)
Creates an Entity object to load an existing entity.
|
ExternalRef |
newExternalRef(String resource,
String reference)
Creates a new external reference.
|
Factuality |
newFactuality(Term term)
Creates a factualitylayer object and add it to the document
|
LinkedEntity |
newLinkedEntity(Span<WF> span)
Creates a LinkedEntity object and add it to the document
|
LinkedEntity |
newLinkedEntity(String id,
Span<WF> span)
Creates a LinkedEntity object and add it to the document, using the supplied ID.
|
Mark |
newMark(String source,
Span<Term> span) |
Mark |
newMark(String id,
String source,
Span<Term> span) |
NonTerminal |
newNonTerminal(String label) |
NonTerminal |
newNonTerminal(String id,
String label) |
Opinion |
newOpinion()
Creates a new opinion object.
|
Opinion |
newOpinion(String id)
Creates a new opinion object.
|
Predicate |
newPredicate(Span<Term> span)
Creates a new srl predicate.
|
Predicate |
newPredicate(String id,
Span<Term> span)
Creates a new srl predicate.
|
Feature |
newProperty(String lemma,
List<Span<Term>> references)
Creates a new property.
|
Feature |
newProperty(String id,
String lemma,
List<Span<Term>> references)
Creates a new property.
|
Relation |
newRelation(Relational from,
Relational to)
Creates a new relation between entities and/or sentiment features.
|
Relation |
newRelation(String id,
Relational from,
Relational to)
Creates a new relation between entities and/or sentiment features.
|
Predicate.Role |
newRole(Predicate predicate,
String semRole,
Span<Term> span)
Creates a new Role object.
|
Predicate.Role |
newRole(String id,
Predicate predicate,
String semRole,
Span<Term> span)
Creates a Role object to load an existing role.
|
Term.Sentiment |
newSentiment()
Creates a Sentiment object.
|
SSTspan |
newSST(Span<Term> span)
Creates a SSTspan object and add it to the document
|
SSTspan |
newSST(Span<Term> span,
String type,
String label) |
Term |
newTerm(Span<WF> span)
Creates a new Term.
|
Term |
newTerm(String id,
Span<WF> span)
Creates a Term object to load an existing term.
|
Term |
newTerm(String id,
Span<WF> span,
boolean isComponent) |
Term |
newTerm(String id,
Span<WF> span,
Integer position) |
Term |
newTerm(String type,
String lemma,
String pos,
Span<WF> span)
Deprecated
|
Term |
newTerm(String id,
String type,
String lemma,
String pos,
Span<WF> span)
Deprecated
|
Terminal |
newTerminal(Span<Term> span) |
Terminal |
newTerminal(String id,
Span<Term> span) |
Term |
newTermOptions(String morphofeat,
Span<WF> span)
Creates a new Term.
|
Term |
newTermOptions(String type,
String lemma,
String pos,
String morphofeat,
Span<WF> span)
Deprecated
|
static Span<Term> |
newTermSpan() |
static Span<Term> |
newTermSpan(List<Term> targets) |
static Span<Term> |
newTermSpan(List<Term> targets,
Term head) |
Timex3 |
newTimex3(Span<WF> mentions,
String type)
Creates a new timeExpressions.
|
Timex3 |
newTimex3(String type)
Creates a new timeExpressions.
|
Timex3 |
newTimex3(String id,
Span<WF> mentions,
String type)
Creates a timeExpressions object to load an existing Timex3.
|
Timex3 |
newTimex3(String id,
String type)
Creates a timeExpressions object to load an existing Timex3.
|
TLink |
newTLink(String id,
TLinkReferable from,
TLinkReferable to,
String relType) |
TLink |
newTLink(TLinkReferable from,
TLinkReferable to,
String relType) |
Topic |
newTopic()
Creates a Topic object and add it to the document
|
Topic |
newTopic(String label,
float probability) |
WF |
newWF(String form)
Deprecated
|
WF |
newWF(String form,
int offset)
Creates a new WF object.
|
WF |
newWF(String form,
int offset,
int sent)
Creates a new WF object.
|
WF |
newWF(String id,
String form)
Deprecated
|
WF |
newWF(String id,
String form,
int sent)
Creates a WF object to load an existing word form.
|
static Span<WF> |
newWFSpan() |
static Span<WF> |
newWFSpan(List<WF> targets) |
static Span<WF> |
newWFSpan(List<WF> targets,
WF head) |
void |
print()
Prints the document on standard output.
|
void |
removeAnnotation(Object annotation) |
void |
removeAnnotations(Iterable<?> annotations) |
void |
removeLayer(KAFDocument.Layer layer) |
void |
save(File file)
Saves the KAF document to an XML file.
|
void |
save(String filename)
Saves the KAF document to an XML file.
|
void |
setLang(String lang)
Sets the language of the processed document
|
void |
setRawText(String rawText)
Set raw text *
|
void |
setVersion(String version)
Sets the KAF version
|
KAFDocument |
split(List<WF> wfs)
Returns a new document containing all annotations related to the given WFs
|
String |
toJsonString() |
String |
toString() |
public static KAFDocument createFromFile(File file) throws IOException, org.jdom2.JDOMException
file
- an existing KAF file to be loaded into the library.IOException
org.jdom2.JDOMException
public static KAFDocument createFromStream(Reader stream) throws IOException
stream
- Reader to read KAF content.IOException
public void setLang(String lang)
public String getLang()
public void setVersion(String version)
public String getVersion()
public LinguisticProcessor addLinguisticProcessor(String layer, String name)
public LinguisticProcessor addLinguisticProcessor(String layer, LinguisticProcessor linguisticProcessor)
public void addLinguisticProcessors(Map<String,List<LinguisticProcessor>> lps)
public Map<String,List<LinguisticProcessor>> getLinguisticProcessors()
public boolean linguisticProcessorExists(String layer, String name, String version)
public boolean linguisticProcessorExists(String layer, String name)
public KAFDocument.FileDesc createFileDesc()
public KAFDocument.FileDesc getFileDesc()
public KAFDocument.Public createPublic()
public KAFDocument.Public getPublic()
public void setRawText(String rawText)
public WF newWF(String id, String form, int sent)
id
- word form's ID.form
- text of the word form itself.public WF newWF(String form, int offset)
form
- text of the word form itself.public WF newWF(String form, int offset, int sent)
form
- text of the word form itself.public Term newTerm(String id, Span<WF> span)
id
- term's ID.type
- type of term. There are two types of term: open and close.lemma
- the lemma of the term.pos
- part of speech of the term.wfs
- the list of word forms this term is formed by.public Term newTerm(Span<WF> span)
type
- the type of the term. There are two types of term: open and close.lemma
- the lemma of the term.pos
- part of speech of the term.wfs
- the list of word forms this term is formed by.public Term newTermOptions(String morphofeat, Span<WF> span)
type
- the type of the term. There are two types of term: open and close.lemma
- the lemma of the term.pos
- part of speech of the term.wfs
- the list of word forms this term is formed by.public Term.Sentiment newSentiment()
public Dep newDep(Term from, Term to, String rfunc)
from
- the origin term of the dependency.to
- the target term of the dependency.rfunc
- relational function of the dependency.public Chunk newChunk(String id, String phrase, Span<Term> span)
id
- chunk's ID.head
- the chunk head.phrase
- type of the phrase.terms
- the list of the terms in the chunk.public Chunk newChunk(String phrase, Span<Term> span)
head
- the chunk head.phrase
- type of the phrase.terms
- the list of the terms in the chunk.public Entity newEntity(String id, List<Span<Term>> references)
id
- the ID of the named entity.type
- entity type. 8 values are posible: Person, Organization, Location, Date, Time, Money, Percent, Misc.references
- it contains one or more span elements. A span can be used to reference the different occurrences of the same named entity in the document. If the entity is composed by multiple words, multiple target elements are used.public Entity newEntity(List<Span<Term>> references)
type
- entity type. 8 values are posible: Person, Organization, Location, Date, Time, Money, Percent, Misc.references
- it contains one or more span elements. A span can be used to reference the different occurrences of the same named entity in the document. If the entity is composed by multiple words, multiple target elements are used.public Coref newCoref(String id, List<Span<Term>> mentions)
id
- the ID of the coreference.references
- different mentions (list of targets) to the same entity.public Coref newCoref(List<Span<Term>> mentions)
references
- different mentions (list of targets) to the same entity.public Timex3 newTimex3(String id, Span<WF> mentions, String type)
id
- the ID of the coreference.references
- different mentions (list of targets) to the same entity.public Timex3 newTimex3(Span<WF> mentions, String type)
references
- different mentions (list of targets) to the same entity.public Timex3 newTimex3(String id, String type)
id
- the ID of the coreference.references
- different mentions (list of targets) to the same entity.public Timex3 newTimex3(String type)
references
- different mentions (list of targets) to the same entity.public TLink newTLink(String id, TLinkReferable from, TLinkReferable to, String relType)
public TLink newTLink(TLinkReferable from, TLinkReferable to, String relType)
public Factuality newFactuality(Term term)
term
- the Term of the coreference.public LinkedEntity newLinkedEntity(String id, Span<WF> span)
id
- the entity IDterm
- the Term of the coreferencepublic LinkedEntity newLinkedEntity(Span<WF> span)
term
- the Term of the coreference.public SSTspan newSST(Span<Term> span)
term
- the Term of the coreference.public Topic newTopic()
term
- the Term of the coreference.public Feature newProperty(String id, String lemma, List<Span<Term>> references)
id
- the ID of the property.lemma
- the lemma of the property.references
- different mentions (list of targets) to the same property.public Feature newProperty(String lemma, List<Span<Term>> references)
lemma
- the lemma of the property.references
- different mentions (list of targets) to the same property.public Feature newCategory(String id, String lemma, List<Span<Term>> references)
id
- the ID of the category.lemma
- the lemma of the category.references
- different mentions (list of targets) to the same category.public Feature newCategory(String lemma, List<Span<Term>> references)
lemma
- the lemma of the category.references
- different mentions (list of targets) to the same category.public Opinion newOpinion()
public Opinion newOpinion(String id)
public Relation newRelation(Relational from, Relational to)
from
- source of the relationto
- target of the relationpublic Relation newRelation(String id, Relational from, Relational to)
id
- the ID of the relationfrom
- source of the relationto
- target of the relationpublic Predicate newPredicate(String id, Span<Term> span)
id
- the ID of the predicatespan
- span containing the targets of the predicatepublic Predicate newPredicate(Span<Term> span)
span
- span containing all the targets of the predicatepublic Predicate.Role newRole(String id, Predicate predicate, String semRole, Span<Term> span)
id
- role's ID.predicate
- the predicate which this role is part ofsemRole
- semantic rolespan
- span containing all the targets of the rolepublic Predicate.Role newRole(Predicate predicate, String semRole, Span<Term> span)
predicate
- the predicate which this role is part ofsemRole
- semantic rolespan
- span containing all the targets of the rolepublic ExternalRef newExternalRef(String resource, String reference)
resource
- indicates the identifier of the resource referred to.reference
- code of the referred element.public void addConstituencyFromParentheses(String parseOut) throws Exception
Exception
public void addConstituencyFromParentheses(String parseOut, Integer sentence) throws Exception
Exception
public NonTerminal newNonTerminal(String id, String label)
public NonTerminal newNonTerminal(String label)
public String getRawText()
public List<List<WF>> getSentences()
public Integer getFirstSentence()
public Integer getNumSentences()
public Integer getFirstParagraph()
public Integer getNumParagraphs()
public List<Term> getTermsByWFs(List<WF> wfs)
wfs
- a list of word forms whose terms will be found.public List<LinkedEntity> getLinkedEntities()
public List<Feature> getProperties()
public List<Feature> getCategories()
public List<org.jdom2.Element> getUnknownLayers()
public KAFDocument split(List<WF> wfs)
public void join(KAFDocument doc)
public void insertDep(Dep dep)
public void save(String filename)
filename
- name of the file in which the document will be saved.public void save(File file)
filename
- name of the file in which the document will be saved.public String toJsonString()
public void print()
public LinguisticProcessor addLinguisticProcessor(String layer, String name, String version)
public LinguisticProcessor addLinguisticProcessor(String layer, String name, String timestamp, String version)
public Term newTerm(String id, String type, String lemma, String pos, Span<WF> span)
public Term newTermOptions(String type, String lemma, String pos, String morphofeat, Span<WF> span)
public Term createTerm(String id, String type, String lemma, String pos, List<WF> wfs)
public Term createTermOptions(String type, String lemma, String pos, String morphofeat, List<WF> wfs)
public Term.Sentiment createSentiment()
public Chunk createChunk(String id, Term head, String phrase, List<Term> terms)
public Entity createEntity(String id, String type, List<List<Term>> references)
public Feature createProperty(String id, String lemma, List<List<Term>> references)
public Feature createCategory(String id, String lemma, List<List<Term>> references)
public Opinion createOpinion()
public Relation createRelation(Relational from, Relational to)
public Relation createRelation(String id, Relational from, Relational to)
public ExternalRef createExternalRef(String resource, String reference)
public static Target createTarget(Term term)
term
- target term.public static Target createTarget(Term term, boolean isHead)
term
- target term.isHead
- a boolean argument which defines whether the target term is the head or not.public void removeLayer(KAFDocument.Layer layer)
public void removeAnnotations(Iterable<?> annotations)
public void removeAnnotation(Object annotation)
public List<Term> getTermsFromWFs(List<String> wfIds)
wfIds
- a list of word form IDs whose terms will be found.public Set<Term> getTermsByDepAncestors(Iterable<Term> ancestors, String pattern)
public Set<Term> getTermsByDepDescendants(Iterable<Term> descendents, String pattern)
public List<Factuality> getFactualities()
public static void main(String[] args)
Copyright © 2016–2020 FBK. All rights reserved.