Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
elebumm
GitHub Repository: elebumm/RedditVideoMakerBot
Path: blob/master/utils/posttextparser.py
327 views
1
import os
2
import re
3
import time
4
from typing import List
5
6
import spacy
7
8
from utils.console import print_step
9
from utils.voice import sanitize_text
10
11
12
# working good
13
def posttextparser(obj, *, tried: bool = False) -> List[str]:
14
text: str = re.sub("\n", " ", obj)
15
try:
16
nlp = spacy.load("en_core_web_sm")
17
except OSError as e:
18
if not tried:
19
os.system("python -m spacy download en_core_web_sm")
20
time.sleep(5)
21
return posttextparser(obj, tried=True)
22
print_step(
23
"The spacy model can't load. You need to install it with the command \npython -m spacy download en_core_web_sm "
24
)
25
raise e
26
27
doc = nlp(text)
28
29
newtext: list = []
30
31
for line in doc.sents:
32
if sanitize_text(line.text):
33
newtext.append(line.text)
34
35
return newtext
36
37