Python库|oh_my_tools_package-0.0.3b9.tar.gz资源-CSDN文库

版权申诉

73 浏览量 2022-04-12 07:30:38 上传评论收藏 15KB GZ 举报

共20个文件

py：19个

pkg-info：1个

资源推荐

资源详情

资源评论

收起资源包目录

oh_my_tools_package-0.0.3b9.tar.gz （20个子文件）

oh_my_tools_package-0.0.3b9

PKG-INFO 253B

my_tools_package

trie.py 2KB

text_utils.py 599B

transformer_tools.py 1KB

gpu_utils.py 4KB

encry.py 590B

__init__.py 2KB

file_writer.py 566B

__init__.py 205B

file_utils.py 2KB

file_reader.py 2KB

seed.py 867B

torch_utils

tensorboard_utils.py 3KB

loss.py 225B

metrics.py 230B

crf_nbest.py 20KB

__init__.py 206B

base.py 13KB

label_studio.py 5KB

setup.py 863B

import torch import torch.nn as nn from typing import List, Optional class CRF(nn.Module): """Conditional random field. This module implements a conditional random field [LMP01]_. The forward computation of this class computes the log likelihood of the given sequence of tags and emission score tensor. This class also has `~CRF.decode` method which finds the best tag sequence given an emission score tensor using `Viterbi algorithm`_. Args: num_tags: Number of tags. batch_first: Whether the first dimension corresponds to the size of a minibatch. Attributes: start_transitions (`~torch.nn.Parameter`): Start transition score tensor of size ``(num_tags,)``. end_transitions (`~torch.nn.Parameter`): End transition score tensor of size ``(num_tags,)``. transitions (`~torch.nn.Parameter`): Transition score tensor of size ``(num_tags, num_tags)``. .. [LMP01] Lafferty, J., McCallum, A., Pereira, F. (2001). "Conditional random fields: Probabilistic models for segmenting and labeling sequence data". *Proc. 18th International Conf. on Machine Learning*. Morgan Kaufmann. pp. 282–289. .. _Viterbi algorithm: https://en.wikipedia.org/wiki/Viterbi_algorithm """ def __init__(self, num_tags: int, batch_first: bool = False) -> None: if num_tags <= 0: raise ValueError(f'invalid number of tags: {num_tags}') super().__init__() self.num_tags = num_tags self.batch_first = batch_first self.start_transitions = nn.Parameter(torch.empty(num_tags)) self.end_transitions = nn.Parameter(torch.empty(num_tags)) self.transitions = nn.Parameter(torch.empty(num_tags, num_tags)) self.reset_parameters() def reset_parameters(self) -> None: """Initialize the transition parameters. The parameters will be initialized randomly from a uniform distribution between -0.1 and 0.1. """ nn.init.uniform_(self.start_transitions, -0.1, 0.1) nn.init.uniform_(self.end_transitions, -0.1, 0.1) nn.init.uniform_(self.transitions, -0.1, 0.1) def __repr__(self) -> str: return f'{self.__class__.__name__}(num_tags={self.num_tags})' def forward(self, emissions: torch.Tensor, tags: torch.LongTensor, mask: Optional[torch.ByteTensor] = None, reduction: str = 'mean') -> torch.Tensor: """Compute the conditional log likelihood of a sequence of tags given emission scores. Args: emissions (`~torch.Tensor`): Emission score tensor of size ``(seq_length, batch_size, num_tags)`` if ``batch_first`` is ``False``, ``(batch_size, seq_length, num_tags)`` otherwise. tags (`~torch.LongTensor`): Sequence of tags tensor of size ``(seq_length, batch_size)`` if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise. mask (`~torch.ByteTensor`): Mask tensor of size ``(seq_length, batch_size)`` if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise. reduction: Specifies the reduction to apply to the output: ``none|sum|mean|token_mean``. ``none``: no reduction will be applied. ``sum``: the output will be summed over batches. ``mean``: the output will be averaged over batches. ``token_mean``: the output will be averaged over tokens. Returns: `~torch.Tensor`: The log likelihood. This will have size ``(batch_size,)`` if reduction is ``none``, ``()`` otherwise. """ tags_zero = torch.zeros_like(tags) tags = torch.where(tags > 0,tags,tags_zero) if reduction not in ('none', 'sum', 'mean', 'token_mean'): raise ValueError(f'invalid reduction: {reduction}') if mask is None: mask = torch.ones_like(tags, dtype=torch.uint8, device=tags.device) if mask.dtype != torch.uint8: mask = mask.byte() self._validate(emissions, tags=tags, mask=mask) if self.batch_first: emissions = emissions.transpose(0, 1) tags = tags.transpose(0, 1) mask = mask.transpose(0, 1) # shape: (batch_size,) numerator = self._compute_score(emissions, tags, mask) # shape: (batch_size,) denominator = self._compute_normalizer(emissions, mask) # shape: (batch_size,) llh = numerator - denominator if reduction == 'none': return llh if reduction == 'sum': return llh.sum() if reduction == 'mean': return llh.mean() return llh.sum() / mask.float().sum() def decode(self, emissions: torch.Tensor, mask: Optional[torch.ByteTensor] = None, nbest: Optional[int] = None, pad_tag: Optional[int] = None) -> List[List[List[int]]]: """Find the most likely tag sequence using Viterbi algorithm. Args: emissions (`~torch.Tensor`): Emission score tensor of size ``(seq_length, batch_size, num_tags)`` if ``batch_first`` is ``False``, ``(batch_size, seq_length, num_tags)`` otherwise. mask (`~torch.ByteTensor`): Mask tensor of size ``(seq_length, batch_size)`` if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise. nbest (`int`): Number of most probable paths for each sequence pad_tag (`int`): Tag at padded positions. Often input varies in length and the length will be padded to the maximum length in the batch. Tags at the padded positions will be assigned with a padding tag, i.e. `pad_tag` Returns: A PyTorch tensor of the best tag sequence for each batch of shape (nbest, batch_size, seq_length) """ if nbest is None: nbest = 1 if mask is None: mask = torch.ones(emissions.shape[:2], dtype=torch.uint8, device=emissions.device) if mask.dtype != torch.uint8: mask = mask.byte() self._validate(emissions, mask=mask) if self.batch_first: emissions = emissions.transpose(0, 1) mask = mask.transpose(0, 1) if nbest == 1: return self._viterbi_decode(emissions, mask, pad_tag).unsqueeze(0) return self._viterbi_decode_nbest(emissions, mask, nbest, pad_tag) def _validate(self, emissions: torch.Tensor, tags: Optional[torch.LongTensor] = None, mask: Optional[torch.ByteTensor] = None) -> None: if emissions.dim() != 3: raise ValueError(f'emissions must have dimension of 3, got {emissions.dim()}') if emissions.size(2) != self.num_tags: raise ValueError( f'expected last dimension of emissions is {self.num_tags}, ' f'got {emissions.size(2)}') if tags is not None: if emissions.shape[:2] != tags.shape: raise ValueError( 'the first two dimensions of emissions and tags must match, ' f'got {tuple(emissions.shape[:2])} and {tuple(tags.shape)}') if mask is not None: if emissions.shape[:2] != mask.shape: raise ValueError( 'the first two dimensions of emissions and mask must match, ' f'got {tuple(emissions.shape[:2])} and {tuple(mask.shape)}') no_empty_seq = not self.batch_first and mask[0].all() no_empty_seq_bf = self.batch_first and mask[:, 0].all() if not no_empty_seq and not no_empty_seq_bf: raise ValueError('mask of the first timestep must all be on') def _compute_score(self, emissions

评论收藏

内容反馈

版权申诉