materias:pln:2019:practico3_draft
Diferencias
Muestra las diferencias entre dos versiones de la página.
| Próxima revisión | Revisión previa | ||
| materias:pln:2019:practico3_draft [2019/03/28 15:19] – created francolq | materias:pln:2019:practico3_draft [2025/11/15 13:47] (actual) – editor externo 127.0.0.1 | ||
|---|---|---|---|
| Línea 54: | Línea 54: | ||
| * [[https:// | * [[https:// | ||
| * [[https:// | * [[https:// | ||
| - | * [[http://nbviewer.ipython.org/url/cs.famaf.unc.edu.ar/~francolq/Etiquetado%20de%20Secuencias.ipynb|Jupyter notebook: Etiquetado de Secuencias]] | + | * [[https://github.com/PLN-FaMAF/PLN-2019/ |
| Línea 128: | Línea 128: | ||
| - | ===== Ejercicio 4: Clasificador "three words" ===== | ||
| + | ===== Ejercicio 4: Hidden Markov Models y Algoritmo de Viterbi ===== | ||
| - | ===== Ejercicio 5: Clasificador con Embeddings ===== | + | * Implementar un Hidden Markov Model cuyos parámetros son las probabilidades de transición entre estados (las etiquetas) y de emisión de símbolos (las palabras). |
| + | * Implementar el algoritmo de Viterbi que calcula el etiquetado más probable de una oración. | ||
| + | |||
| + | Interfaz de '' | ||
| + | |||
| + | <code python> | ||
| + | class HMM: | ||
| + | |||
| + | def __init__(self, | ||
| + | """ | ||
| + | n -- n-gram size. | ||
| + | tagset -- set of tags. | ||
| + | trans -- transition probabilities dictionary. | ||
| + | out -- output probabilities dictionary. | ||
| + | """ | ||
| + | |||
| + | def tagset(self): | ||
| + | """ | ||
| + | """ | ||
| + | |||
| + | def trans_prob(self, | ||
| + | """ | ||
| + | |||
| + | tag -- the tag. | ||
| + | prev_tags -- tuple with the previous n-1 tags (optional only if n = 1). | ||
| + | """ | ||
| + | |||
| + | def out_prob(self, | ||
| + | """ | ||
| + | |||
| + | word -- the word. | ||
| + | tag -- the tag. | ||
| + | """ | ||
| + | |||
| + | def tag_prob(self, | ||
| + | """ | ||
| + | Probability of a tagging. | ||
| + | Warning: subject to underflow problems. | ||
| + | |||
| + | y -- tagging. | ||
| + | """ | ||
| + | |||
| + | def prob(self, x, y): | ||
| + | """ | ||
| + | Joint probability of a sentence and its tagging. | ||
| + | Warning: subject to underflow problems. | ||
| + | |||
| + | x -- sentence. | ||
| + | y -- tagging. | ||
| + | """ | ||
| + | |||
| + | def tag_log_prob(self, | ||
| + | """ | ||
| + | Log-probability of a tagging. | ||
| + | |||
| + | y -- tagging. | ||
| + | """ | ||
| + | |||
| + | def log_prob(self, | ||
| + | """ | ||
| + | Joint log-probability of a sentence and its tagging. | ||
| + | |||
| + | x -- sentence. | ||
| + | y -- tagging. | ||
| + | """ | ||
| + | |||
| + | def tag(self, sent): | ||
| + | """ | ||
| + | |||
| + | sent -- the sentence. | ||
| + | """ | ||
| + | |||
| + | |||
| + | class ViterbiTagger: | ||
| + | |||
| + | def __init__(self, | ||
| + | """ | ||
| + | hmm -- the HMM. | ||
| + | """ | ||
| + | |||
| + | def tag(self, sent): | ||
| + | """ | ||
| + | |||
| + | sent -- the sentence. | ||
| + | """ | ||
| + | </ | ||
| + | |||
| + | Tests: | ||
| + | |||
| + | $ nosetests tagging/ | ||
| + | $ nosetests tagging/ | ||
| + | |||
| + | Documentación: | ||
| + | |||
| + | * [[http:// | ||
| + | |||
| + | |||
| + | ===== Ejercicio 5: HMM POS Tagger ===== | ||
| + | |||
| + | * Implementar en una clase '' | ||
| + | * La clase debe tener **la misma interfaz que '' | ||
| + | * Agregar al script de entrenamiento (train.py) una opción de línea de comandos que permita utilizar la MLHMM con distintos valores de '' | ||
| + | * Entrenar y evaluar para varios valores de '' | ||
| + | |||
| + | Interfaz de '' | ||
| + | |||
| + | <code python> | ||
| + | class MLHMM: | ||
| + | |||
| + | def __init__(self, | ||
| + | """ | ||
| + | n -- order of the model. | ||
| + | tagged_sents -- training sentences, each one being a list of pairs. | ||
| + | addone -- whether to use addone smoothing (default: True). | ||
| + | """ | ||
| + | |||
| + | def tcount(self, | ||
| + | """ | ||
| + | |||
| + | tokens -- the n-gram or (n-1)-gram tuple of tags. | ||
| + | """ | ||
| + | |||
| + | def unknown(self, | ||
| + | """ | ||
| + | |||
| + | w -- the word. | ||
| + | """ | ||
| + | |||
| + | """ | ||
| + | Todos los métodos de HMM. | ||
| + | """ | ||
| + | </ | ||
| + | |||
| + | Tests: | ||
| + | |||
| + | $ nosetests tagging/ | ||
| + | |||
| + | Documentación: | ||
| + | |||
| + | * [[http:// | ||
| + | |||
| + | |||
| + | ===== Ejercicio 6: Clasificador "three words" ===== | ||
| + | |||
| + | **TBA** | ||
| + | |||
| + | ===== Ejercicio 7: Clasificador con Embeddings ===== | ||
| + | |||
| + | **TBA** | ||
| + | |||
| + | ===== Ejercicio 8: Análisis de Error y Nuevos Features | ||
| + | |||
| + | **TBA** | ||
| + | |||
| + | ===== Ejercicio 9: Red Neuronal Recurrente ===== | ||
| + | |||
| + | **TBA** | ||
| - | ===== Ejercicio 6: Análisis de Error y Nuevos Features | ||
| /* | /* | ||
| - | ===== Ejercicio | + | ===== Ejercicio |
| + | ===== Ejercicio 10: Red Neuronal Recurrente ===== | ||
| - | ===== Ejercicio 8: Red Neuronal Recurrente ===== | + | * https:// |
| + | * https:// | ||
| */ | */ | ||
materias/pln/2019/practico3_draft.1553797173.txt.gz · Última modificación: (editor externo)
