from pickle import TRUE
import streamlit as st
import spacy
from spacy import displacy, load
import re
import csv
st.set_page_config(
page_title="NER for SG Locations",
page_icon="πΈπ¬",
)
def _max_width_():
max_width_str = f"max-width: 1400px;"
st.markdown(
f"""
<style>
.main .block-container{{
{max_width_str}
}}
</style>
""",
unsafe_allow_html=True,
)
_max_width_()
@st.cache(show_spinner=False, allow_output_mutation=True, suppress_st_warning=True)
def load_models():
standard_model = spacy.load("en_core_web_md")
er_model = spacy.load("./models/model_v2.1")
doccano_model = spacy.load("./models/model_v3.0/model-best")
v31_model = spacy.load("./models/model_v3.1/model-best")
models = {"std": standard_model, "erl": er_model,
"dcn": doccano_model, "v31": v31_model}
return models
models = load_models()
abbreviation_dictionary = []
with open("./data/extracted_locations/sg_abbreviations.csv", "r") as csv_file:
csvtest = csv.reader(csv_file, delimiter=",")
for row in csvtest:
abbreviation_dictionary.append(row)
def lengthen_abbreviations(text):
split = re.findall(r"[\w']+|[.,!?;&] | |-", text)
i = 0
for word in split:
for row in abbreviation_dictionary:
check_column = 0
while check_column < 4:
if word == "":
split[i] = ''
elif word == row[check_column]:
split[i] = row[3]
check_column += 1
i += 1
cleaned_text = ''.join(split)
return cleaned_text
st.title("Named Entity Recogniser for Singapore Locations π")
st.write("Compare the Standard English NLP Model with the Trained SG Location Names Model.")
def clear_form():
st.session_state[1] = ""
def select_models(all_models_selected):
if all_models_selected == True:
selected_models = container.multiselect("Choose one or more models to analyse text with:", [
'Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'], ['Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'])
else:
selected_models = container.multiselect("Choose one or more models to analyse text with:", [
'Standard Model', 'Dictionary Model', 'NER Model 3.0', 'NER Model 3.1'])
return selected_models
def find_ents(model, input, abr_lengthen):
if abr_lengthen == True:
doc = model(lengthen_abbreviations(input))
else:
doc = model(input)
ent_html = displacy.render(doc, style="ent", jupyter=False)
st.markdown(ent_html, unsafe_allow_html=True)
st.write("")
model_choice = {"Standard Model": ("Pre-trained Standard English Model π", models["std"]),
"Dictionary Model": ("Dictionary-centric Model for SG Locations π", models["erl"]),
"NER Model 3.0": ("Enhanced NER-centric Model 3.0 for SG Locations π¦", models["dcn"]),
"NER Model 3.1": ("Enhanced NER-centric Model 3.1 for SG Locations π", models["v31"])
}
def display_models(selected_models, text_input, abbreviation_status):
for selected_model in selected_models:
st.header(model_choice[selected_model][0])
find_ents(model_choice[selected_model][1],
text_input, abbreviation_status)
with st.form("NER_form"):
container = st.container()
c_all_model_selection, c_abbreviate_selection, c_selection_last = st.columns([
1, 1, 3])
with c_all_model_selection:
all_models_selected = st.checkbox("Select all models")
with c_abbreviate_selection:
abbreviation_status = st.checkbox("Lengthen abbreviations")
selected_models = select_models(all_models_selected)
text_input = st.empty()
input = text_input.text_area('Text to analyze:', key=1)
c_submit, c_clear, c_last = st.columns([1, 1, 5])
with c_submit:
submitted = st.form_submit_button("Find Locations π")
with c_clear:
click_clear = st.form_submit_button(
'Clear text input β«', on_click=clear_form)
if submitted:
display_models(selected_models, input, abbreviation_status)
with st.expander("βΉοΈ - About this app", expanded=False):
st.write(
"""
- This *Named Entity Recognition model for Singapore Location Names* detects Singaporean addresses, place names and building names from text.
- It was made with [spaCy v3](https://spacy.io/), an open-source library for Natural Language Processing.
- Check out the source code here on my [Github repo](https://github.com/drgnfrts/Singapore-Locations-NER)! :)
"""
)
st.markdown("")