Skip to content

add_fld_metrics KeyError

I'm running the sc-framework ATAC/02_QC_filtering notebook. When attempting to run add_fld_metrics the following error occurs:

Count insertsizes from fragments...
Starting counting fragments...

Processing Chunks: 394it [44:15,  6.74s/it]

Done reading file - elapsed time: 0:44:28
Converting counts to dataframe...

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[15], line 2
      1 if calculate_fld_score and not use_checkpoint:
----> 2     fld.add_fld_metrics(adata=adata,
      3                         fragments=fragments_file,
      4                         barcode_col=None,
      5                         barcode_tag=barcode_tag,
      6                         chunk_size_bam=1000000,
      7                         regions=None,
      8                         peaks_thr=10,
      9                         wavelength=150,
     10                         sigma=0.4,
     11                         plot=False,
     12                         save_density=None,
     13                         save_overview=None,
     14                         sample=0)
     16     adata.obs

File <@beartype(peakqc.fld_scoring.add_fld_metrics) at 0x7f5654d75c60>:327, in add_fld_metrics(__beartype_object_94051778286480, __beartype_get_violation, __beartype_conf, __beartype_object_140015359174272, __beartype_object_94051761317008, __beartype_object_94050801081376, __beartype_object_94051772755696, __beartype_object_140008775532800, __beartype_object_94051766036256, __beartype_object_94051766176368, __beartype_check_meta, __beartype_func, *args, **kwargs)

File /workspace/.conda/sctoolbox-0.12/lib/python3.12/site-packages/peakqc/fld_scoring.py:1111, in add_fld_metrics(adata, fragments, barcode_col, barcode_tag, chunk_size_bam, chunk_size_fragments, regions, peaks_thr, wavelength, sigma, plot, save_density, save_overview, sample, n_threads, return_distributions)
   1104     count_table = insertsizes.insertsize_from_bam(bamfile=fragments,
   1105                                                   barcodes=adata_barcodes,
   1106                                                   barcode_tag=barcode_tag,
   1107                                                   chunk_size=chunk_size_bam,
   1108                                                   regions=regions)
   1110 elif bed:
-> 1111     count_table = insertsizes.insertsize_from_fragments(fragments=fragments,
   1112                                                         barcodes=adata_barcodes,
   1113                                                         chunk_size=chunk_size_fragments,
   1114                                                         n_threads=8)
   1116 # get the mean insert size and the insert size counts separately
   1117 means = count_table.pop('mean_insertsize')

File <@beartype(peakqc.insertsizes.insertsize_from_fragments) at 0x7f5654cf3880>:102, in insertsize_from_fragments(__beartype_get_violation, __beartype_conf, __beartype_object_94050801081376, __beartype_getrandbits, __beartype_object_94051772755696, __beartype_check_meta, __beartype_func, *args, **kwargs)

File /workspace/.conda/sctoolbox-0.12/lib/python3.12/site-packages/peakqc/insertsizes.py:182, in insertsize_from_fragments(fragments, barcodes, chunk_size, n_threads)
    180 table = pd.DataFrame.from_dict(count_dict, orient="index")
    181 # round mean_insertsize to 2 decimals
--> 182 table["mean_insertsize"] = table["mean_insertsize"].round(2)
    184 print("Done getting insertsizes from fragments!")
    186 return table

File /workspace/.conda/sctoolbox-0.12/lib/python3.12/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
   4100 if self.columns.nlevels > 1:
   4101     return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
   4103 if is_integer(indexer):
   4104     indexer = [indexer]

File /workspace/.conda/sctoolbox-0.12/lib/python3.12/site-packages/pandas/core/indexes/range.py:417, in RangeIndex.get_loc(self, key)
    415         raise KeyError(key) from err
    416 if isinstance(key, Hashable):
--> 417     raise KeyError(key)
    418 self._check_indexing_error(key)
    419 raise KeyError(key)

KeyError: 'mean_insertsize'

The fragments file /mnt/workspace2/hschult/sc-framework-paper/data/human_heart/preprocessing/atac_output_forward-reverse/bam/all_fragments.bed is with 80GB pretty big. Could this be the issue?